Esempio n. 1
0
def json_error(error):
    """
    Convert exceptions to JSON responses.

    :param Exception error: an Exception to convert to JSON
    :return: a Flask JSON response
    :rtype: flask.Response
    """
    if isinstance(error, HTTPException):
        response = jsonify({
            'status': error.code,
            'message': error.description
        })
        response.status_code = error.code
    else:
        # Log the actual exception before it's gobbled up by Flask
        log.exception(error)
        status_code = 500
        message = None
        if isinstance(error, ValidationError):
            status_code = 400
        elif isinstance(error, ServiceUnavailable) or isinstance(
                error, AuthError):
            status_code = 503
            message = 'The database connection failed'

        response = jsonify({
            'status': status_code,
            'message': message or str(error)
        })
        response.status_code = status_code
    return response
Esempio n. 2
0
    def get_connection(self, db_name, force_new=False, retry=None):
        """
        Return an existing psycopg2 connection and establish it if needed.

        :param str db_name: the database name to get a connection to
        :kwarg bool force_new: forces a new database connection even if one
        already exists
        :kwarg int retry: the number of times to retry a failed connection. If this
        is not set, then the Teiid connection attempt will be repeated until it is successful.
        :return: a connection to Teiid
        :rtype: psycopg2 connection
        """
        if not force_new and db_name in self._connections:
            return self._connections[db_name]
        if retry is not None and retry < 1:
            raise ValueError(
                'The retry keyword must contain a value greater than 0')

        log.debug('Connecting to Teiid host {0}:{1}'.format(
            self.host, self.port))
        attempts = 0
        while True:
            attempts += 1
            try:
                conn = psycopg2.connect(database=db_name,
                                        host=self.host,
                                        port=str(self.port),
                                        user=self.username,
                                        password=self.password,
                                        connect_timeout=300)
                break
            except psycopg2.OperationalError as e:
                if retry and attempts > retry:
                    raise
                else:
                    log.exception(e)
                    log.warning(
                        'The Teiid connection failed on attempt {0}. Sleeping for 60 '
                        'seconds.'.format(attempts))
                    sleep(60)

        # Teiid does not support setting this value at all and unless we
        # specify ISOLATION_LEVEL_AUTOCOMMIT (zero), psycopg2 will send a
        # SET command to the Teiid server doesn't understand.
        conn.set_isolation_level(0)

        self._connections[db_name] = conn
        return conn
Esempio n. 3
0
def health_check():
    """Determine the health of the API by checking if the database interactions are working."""
    msg = 'Health check OK'
    status = 200
    try:
        # Just perform a simple math operation that doesn't rely on any data being present in the
        # database
        results, _ = db.cypher_query('RETURN sqrt(4)')
        assert results[0][0] == 2.0
    except:  # noqa E722
        log.exception(
            'An exception was encountered when verifying the database connection in the '
            'health check API endpoint')
        msg = 'The health check failed while verifying the database connection'
        status = 503

    return (msg, status, [('Content-Type', 'text/plain')])
Esempio n. 4
0
    def query(self, sql, db='public', retry=None):
        """
        Send the SQL query to Teiid and return the rows as a list.

        :param str sql: the SQL query to send to the database
        :kwarg str db: the database name to query on
        :kwarg int retry: the number of times to retry a failed query. If this
        is not set, then the Teiid query will be repeated until it is successful.
        :return: a list of rows from Teiid. Each row is a dictionary
        with the column headers as the keys.
        :rtype: list
        """
        con = self.get_connection(db)
        cursor = con.cursor()
        if retry is not None and retry < 1:
            raise ValueError(
                'The retry keyword must contain a value greater than 0')

        if self._last_query_dt:
            now = datetime.utcnow()
            now_and_last_diff = now - self._last_query_dt
            if now_and_last_diff < timedelta(seconds=0.5):
                sleep(now_and_last_diff.total_seconds())

        log.debug('Querying Teiid DB "{0}" with SQL:\n{1}'.format(db, sql))

        fifteen_mins = 15 * 60
        backoff = 30
        attempts = 0
        while True:
            attempts += 1
            try:
                if attempts > 1:
                    # Restart the database connection after failed queries
                    con = self.get_connection(db, force_new=True)
                    cursor = con.cursor()
                cursor.execute(sql)
                self._last_query_dt = datetime.utcnow()
                break
            except psycopg2.OperationalError as e:
                if retry and attempts > retry:
                    raise
                else:
                    log.exception(e)
                    if backoff < fifteen_mins:
                        # Double the backoff time
                        backoff = backoff * 2
                    elif backoff > fifteen_mins:
                        # Max out the backoff time to 15 minutes
                        backoff = fifteen_mins
                    log.warning(
                        'The Teiid query failed on attempt {0}. Sleeping for {1} seconds.'
                        .format(attempts, backoff))
                    sleep(backoff)

        data = cursor.fetchall()
        # column header names
        cols = [t[0] for t in cursor.description or []]
        log.debug('Found the following columns: {}'.format(cols))
        log.debug('Received {} rows from Teiid'.format(len(data)))
        # build a return array with all columns
        return [dict(zip(cols, row)) for row in data]
Esempio n. 5
0
    def format_story_results(self, results, requested_item):
        """
        Format story results from Neo4j to the API format.

        :param list results: nodes in a story/path
        :param EstuaryStructuredNode requested_item: item requested by the user
        :return: results in API format
        :rtype: dict
        """
        data = []
        for i, node in enumerate(results):
            if node.id == requested_item.id:
                requested_node_index = i
                serialized_node = node.serialized_all
            else:
                serialized_node = node.serialized
            serialized_node['resource_type'] = node.__label__
            serialized_node['display_name'] = node.display_name
            serialized_node['timeline_timestamp'] = node.timeline_timestamp
            data.append(serialized_node)

        base_instance = BaseStoryManager()
        wait_times, total_wait_time = base_instance.get_wait_times(results)
        total_processing_time = 0
        processing_time_flag = False
        total_lead_time = 0
        try:
            processing_time, flag = base_instance.get_total_processing_time(
                results)
            total_processing_time = processing_time
            processing_time_flag = flag
        except:  # noqa E722
            log.exception('Failed to compute total processing time statistic.')
        try:
            total_lead_time = base_instance.get_total_lead_time(results)
        except:  # noqa E722
            log.exception('Failed to compute total lead time statistic.')
        formatted_results = {
            'data': data,
            'meta': {
                'story_related_nodes_forward':
                list(self.get_sibling_nodes_count(results)),
                'story_related_nodes_backward':
                list(self.get_sibling_nodes_count(results, reverse=True)),
                'requested_node_index':
                requested_node_index,
                'story_type':
                self.__class__.__name__[:-12].lower(),
                'wait_times':
                wait_times,
                'total_wait_time':
                total_wait_time,
                'total_processing_time':
                total_processing_time,
                'processing_time_flag':
                processing_time_flag,
                'total_lead_time':
                total_lead_time
            }
        }
        return formatted_results
Esempio n. 6
0
def get_resource_story(resource, uid):
    """
    Get the story of a resource from Neo4j.

    :param str resource: a resource name that maps to a neomodel class
    :param str uid: the value of the UniqueIdProperty to query with
    :return: a Flask JSON response
    :rtype: flask.Response
    :raises NotFound: if the item is not found
    :raises ValidationError: if an invalid resource was requested
    """
    fallback_resources = request.args.getlist('fallback')
    # Try all resources input by the user
    for _resource in [resource] + fallback_resources:
        item = get_neo4j_node(_resource, uid)
        # If a resource is found, we don't need to try the other resources
        if item:
            break

    if not item:
        raise NotFound('This item does not exist')

    story_manager = estuary.utils.story.BaseStoryManager.get_story_manager(
        item, current_app.config, limit=True)

    def _get_partial_story(results, reverse=False):

        if not results:
            return []

        # Assuming that if Path is the first result, then that's all we want to process
        results = [list(results[0][0].nodes)]
        # Reverse will be true when it is a backward query to preserve the story order
        if reverse:
            results = [results[0][::-1]]

        return EstuaryStructuredNode.inflate_results(results)[0]

    results = []
    if story_manager.forward_story:
        results = story_manager.set_story_labels(
            item.__label__, _get_partial_story(story_manager.forward_story))

    if story_manager.backward_story:
        backward_query_results = story_manager.set_story_labels(
            item.__label__,
            _get_partial_story(story_manager.backward_story, reverse=True),
            reverse=True)
        if backward_query_results and results:
            # Remove the first element of backward_query_results in order to avoid
            # duplication of the requested resource when result of forward query are not None.
            backward_query_results = backward_query_results[:-1]
        results = backward_query_results + results

    # Adding the artifact itself if it's story is not available
    if not results:
        base_instance = estuary.utils.story.BaseStoryManager()
        wait_times, total_wait_time = base_instance.get_wait_times([item])
        rv = {'data': [item.serialized_all], 'meta': {}}
        rv['meta']['story_related_nodes_forward'] = [0]
        rv['meta']['story_related_nodes_backward'] = [0]
        rv['meta']['requested_node_index'] = 0
        rv['meta'][
            'story_type'] = story_manager.__class__.__name__[:-12].lower()
        rv['meta']['wait_times'] = wait_times
        rv['meta']['total_wait_time'] = total_wait_time
        rv['meta']['total_processing_time'] = None
        rv['meta']['processing_time_flag'] = False
        rv['meta']['total_lead_time'] = 0
        try:
            total_processing_time, flag = base_instance.get_total_processing_time(
                [item])
            rv['meta']['total_processing_time'] = total_processing_time
            rv['meta']['processing_time_flag'] = flag
        except:  # noqa E722
            log.exception('Failed to compute total processing time.')
        rv['data'][0]['resource_type'] = item.__label__
        rv['data'][0]['display_name'] = item.display_name
        rv['data'][0]['timeline_timestamp'] = item.timeline_timestamp
        return jsonify(rv)

    return jsonify(story_manager.format_story_results(results, item))
Esempio n. 7
0
def get_resource_all_stories(resource, uid):
    """
    Get all unique stories of an artifact from Neo4j.

    :param str resource: a resource name that maps to a neomodel class
    :param str uid: the value of the UniqueIdProperty to query with
    :return: a Flask JSON response
    :rtype: flask.Response
    :raises NotFound: if the item is not found
    :raises ValidationError: if an invalid resource was requested
    """
    fallback_resources = request.args.getlist('fallback')
    # Try all resources input by the user
    for _resource in [resource] + fallback_resources:
        item = get_neo4j_node(_resource, uid)
        # If a resource is found, we don't need to try the other resources
        if item:
            break

    story_manager = estuary.utils.story.BaseStoryManager.get_story_manager(
        item, current_app.config)

    def _get_partial_stories(results, reverse=False):

        results_list = []

        if not results:
            return results_list

        # Creating a list of lists where each list is a collection of node IDs
        # of the nodes present in that particular story path.
        # Paths are re-sorted in ascending order to simplify the logic below
        path_nodes_id = []
        for path in reversed(results):
            path_nodes_id.append([node.id for node in path[0].nodes])

        unique_paths = []
        for index, node_set in enumerate(path_nodes_id[:-1]):
            unique = True
            for alternate_set in path_nodes_id[index + 1:]:
                # If the node_set is a subset of alternate_set,
                # we know they are the same path except the alternate_set is longer.
                # If alternate_set and node_set only have one node ID of difference,
                # we know it's the same path but from the perspective of different siblings.
                if set(node_set).issubset(set(alternate_set)) or len(
                        set(alternate_set).difference(set(node_set))) == 1:
                    unique = False
                    break
            if unique:
                # Since results is from longest to shortest, we need to get the opposite index.
                unique_paths.append(results[(len(path_nodes_id) - index) -
                                            1][0])
        # While traversing, the outer for loop only goes until the second to last element
        # because the inner for loop always starts one element ahead of the outer for loop.
        # Hence, all the subsets of the last element will not be added to the unique_paths
        # list as the for loops will eliminate them. So we add the last element
        # since we are sure it is unique.
        unique_paths.append(results[0][0])
        if reverse:
            unique_paths_nodes = [path.nodes[::-1] for path in unique_paths]
        else:
            unique_paths_nodes = [path.nodes for path in unique_paths]

        return EstuaryStructuredNode.inflate_results(unique_paths_nodes)

    if story_manager.forward_story:
        results_forward = _get_partial_stories(story_manager.forward_story)
    else:
        results_forward = []

    if story_manager.backward_story:
        results_backward = _get_partial_stories(story_manager.backward_story,
                                                reverse=True)
    else:
        results_backward = []

    all_results = []
    if not results_backward or not results_forward:
        if results_forward:
            results_unidir = [
                story_manager.set_story_labels(item.__label__, result)
                for result in results_forward
            ]
        else:
            results_unidir = [
                story_manager.set_story_labels(item.__label__,
                                               result,
                                               reverse=True)
                for result in results_backward
            ]

        for result in results_unidir:
            all_results.append(story_manager.format_story_results(
                result, item))

    else:
        # Combining all the backward and forward paths to generate all the possible full paths
        for result_forward in results_forward:
            for result_backward in results_backward:
                results = story_manager.set_story_labels(
                    item.__label__, result_backward, reverse=True) + \
                    story_manager.set_story_labels(item.__label__, result_forward)[1:]
                all_results.append(
                    story_manager.format_story_results(results, item))

    # Adding the artifact itself if its story is not available
    if not all_results:
        base_instance = estuary.utils.story.BaseStoryManager()
        wait_times, total_wait_time = base_instance.get_wait_times([item])
        rv = {'data': [item.serialized_all], 'meta': {}}
        rv['meta']['story_related_nodes_forward'] = [0]
        rv['meta']['story_related_nodes_backward'] = [0]
        rv['meta']['requested_node_index'] = 0
        rv['meta'][
            'story_type'] = story_manager.__class__.__name__[:-12].lower()
        rv['meta']['wait_times'] = wait_times
        rv['meta']['total_wait_time'] = total_wait_time
        rv['meta']['total_processing_time'] = None
        rv['meta']['processing_time_flag'] = False
        rv['meta']['total_lead_time'] = 0
        try:
            total_processing_time, flag = base_instance.get_total_processing_time(
                [item])
            rv['meta']['total_processing_time'] = total_processing_time
            rv['meta']['processing_time_flag'] = flag
        except:  # noqa E722
            log.exception('Failed to compute total processing time.')
        rv['data'][0]['resource_type'] = item.__label__
        rv['data'][0]['display_name'] = item.display_name
        rv['data'][0]['timeline_timestamp'] = item.timeline_timestamp
        all_results.append(rv)

    return jsonify(all_results)
Esempio n. 8
0
def _get_exception_users():
    """
    Get the list of users that are explicitly whitelisted.

    If the LDAP search fails, an empty set is returned.

    :return: a set of usernames
    :rtype: set
    :raise InternalServerError: if a required configuration value is not set or the connection to
        the LDAP server fails
    """
    # Import this here so it's not required for deployments with auth disabled
    import ldap3

    base_error = '%s is not set in the server configuration'
    ldap_uri = current_app.config.get('LDAP_URI')
    if not ldap_uri:
        log.error(base_error, 'LDAP_URI')
        raise InternalServerError()

    ldap_group_dn = current_app.config.get('LDAP_EXCEPTIONS_GROUP_DN')
    if not ldap_group_dn:
        log.error(base_error, 'LDAP_EXCEPTIONS_GROUP_DN')
        raise InternalServerError()

    if ldap_uri.startswith('ldaps://'):
        ca = current_app.config['LDAP_CA_CERTIFICATE']
        log.debug('Connecting to %s using SSL and the CA %s', ldap_uri, ca)
        tls = ldap3.Tls(ca_certs_file=ca, validate=ssl.CERT_REQUIRED)
        server = ldap3.Server(ldap_uri, use_ssl=True, tls=tls)
    else:
        log.debug('Connecting to %s without SSL', ldap_uri)
        server = ldap3.Server(ldap_uri)

    connection = ldap3.Connection(server)
    try:
        connection.open()
    except ldap3.core.exceptions.LDAPSocketOpenError:
        log.exception('The connection to %s failed', ldap_uri)
        raise InternalServerError()

    membership_attr = current_app.config['LDAP_GROUP_MEMBERSHIP_ATTRIBUTE']
    log.debug('Searching for the attribute %s on %s', ldap_group_dn,
              membership_attr)
    # Set the scope to base so only the group from LDAP_GROUP_DN is returned
    success = connection.search(ldap_group_dn,
                                '(cn=*)',
                                search_scope=ldap3.BASE,
                                attributes=[membership_attr])
    if not success:
        log.error(
            'The user exceptions list could not be determined because the search for the attribute '
            '%s on %s failed with %r',
            membership_attr,
            ldap_group_dn,
            connection.response,
        )
        return set()

    return set([
        dn.split('=')[1].split(',')[0]
        for dn in connection.response[0]['attributes'][membership_attr]
    ])