def json_error(error): """ Convert exceptions to JSON responses. :param Exception error: an Exception to convert to JSON :return: a Flask JSON response :rtype: flask.Response """ if isinstance(error, HTTPException): response = jsonify({ 'status': error.code, 'message': error.description }) response.status_code = error.code else: # Log the actual exception before it's gobbled up by Flask log.exception(error) status_code = 500 message = None if isinstance(error, ValidationError): status_code = 400 elif isinstance(error, ServiceUnavailable) or isinstance( error, AuthError): status_code = 503 message = 'The database connection failed' response = jsonify({ 'status': status_code, 'message': message or str(error) }) response.status_code = status_code return response
def get_connection(self, db_name, force_new=False, retry=None): """ Return an existing psycopg2 connection and establish it if needed. :param str db_name: the database name to get a connection to :kwarg bool force_new: forces a new database connection even if one already exists :kwarg int retry: the number of times to retry a failed connection. If this is not set, then the Teiid connection attempt will be repeated until it is successful. :return: a connection to Teiid :rtype: psycopg2 connection """ if not force_new and db_name in self._connections: return self._connections[db_name] if retry is not None and retry < 1: raise ValueError( 'The retry keyword must contain a value greater than 0') log.debug('Connecting to Teiid host {0}:{1}'.format( self.host, self.port)) attempts = 0 while True: attempts += 1 try: conn = psycopg2.connect(database=db_name, host=self.host, port=str(self.port), user=self.username, password=self.password, connect_timeout=300) break except psycopg2.OperationalError as e: if retry and attempts > retry: raise else: log.exception(e) log.warning( 'The Teiid connection failed on attempt {0}. Sleeping for 60 ' 'seconds.'.format(attempts)) sleep(60) # Teiid does not support setting this value at all and unless we # specify ISOLATION_LEVEL_AUTOCOMMIT (zero), psycopg2 will send a # SET command to the Teiid server doesn't understand. conn.set_isolation_level(0) self._connections[db_name] = conn return conn
def health_check(): """Determine the health of the API by checking if the database interactions are working.""" msg = 'Health check OK' status = 200 try: # Just perform a simple math operation that doesn't rely on any data being present in the # database results, _ = db.cypher_query('RETURN sqrt(4)') assert results[0][0] == 2.0 except: # noqa E722 log.exception( 'An exception was encountered when verifying the database connection in the ' 'health check API endpoint') msg = 'The health check failed while verifying the database connection' status = 503 return (msg, status, [('Content-Type', 'text/plain')])
def query(self, sql, db='public', retry=None): """ Send the SQL query to Teiid and return the rows as a list. :param str sql: the SQL query to send to the database :kwarg str db: the database name to query on :kwarg int retry: the number of times to retry a failed query. If this is not set, then the Teiid query will be repeated until it is successful. :return: a list of rows from Teiid. Each row is a dictionary with the column headers as the keys. :rtype: list """ con = self.get_connection(db) cursor = con.cursor() if retry is not None and retry < 1: raise ValueError( 'The retry keyword must contain a value greater than 0') if self._last_query_dt: now = datetime.utcnow() now_and_last_diff = now - self._last_query_dt if now_and_last_diff < timedelta(seconds=0.5): sleep(now_and_last_diff.total_seconds()) log.debug('Querying Teiid DB "{0}" with SQL:\n{1}'.format(db, sql)) fifteen_mins = 15 * 60 backoff = 30 attempts = 0 while True: attempts += 1 try: if attempts > 1: # Restart the database connection after failed queries con = self.get_connection(db, force_new=True) cursor = con.cursor() cursor.execute(sql) self._last_query_dt = datetime.utcnow() break except psycopg2.OperationalError as e: if retry and attempts > retry: raise else: log.exception(e) if backoff < fifteen_mins: # Double the backoff time backoff = backoff * 2 elif backoff > fifteen_mins: # Max out the backoff time to 15 minutes backoff = fifteen_mins log.warning( 'The Teiid query failed on attempt {0}. Sleeping for {1} seconds.' .format(attempts, backoff)) sleep(backoff) data = cursor.fetchall() # column header names cols = [t[0] for t in cursor.description or []] log.debug('Found the following columns: {}'.format(cols)) log.debug('Received {} rows from Teiid'.format(len(data))) # build a return array with all columns return [dict(zip(cols, row)) for row in data]
def format_story_results(self, results, requested_item): """ Format story results from Neo4j to the API format. :param list results: nodes in a story/path :param EstuaryStructuredNode requested_item: item requested by the user :return: results in API format :rtype: dict """ data = [] for i, node in enumerate(results): if node.id == requested_item.id: requested_node_index = i serialized_node = node.serialized_all else: serialized_node = node.serialized serialized_node['resource_type'] = node.__label__ serialized_node['display_name'] = node.display_name serialized_node['timeline_timestamp'] = node.timeline_timestamp data.append(serialized_node) base_instance = BaseStoryManager() wait_times, total_wait_time = base_instance.get_wait_times(results) total_processing_time = 0 processing_time_flag = False total_lead_time = 0 try: processing_time, flag = base_instance.get_total_processing_time( results) total_processing_time = processing_time processing_time_flag = flag except: # noqa E722 log.exception('Failed to compute total processing time statistic.') try: total_lead_time = base_instance.get_total_lead_time(results) except: # noqa E722 log.exception('Failed to compute total lead time statistic.') formatted_results = { 'data': data, 'meta': { 'story_related_nodes_forward': list(self.get_sibling_nodes_count(results)), 'story_related_nodes_backward': list(self.get_sibling_nodes_count(results, reverse=True)), 'requested_node_index': requested_node_index, 'story_type': self.__class__.__name__[:-12].lower(), 'wait_times': wait_times, 'total_wait_time': total_wait_time, 'total_processing_time': total_processing_time, 'processing_time_flag': processing_time_flag, 'total_lead_time': total_lead_time } } return formatted_results
def get_resource_story(resource, uid): """ Get the story of a resource from Neo4j. :param str resource: a resource name that maps to a neomodel class :param str uid: the value of the UniqueIdProperty to query with :return: a Flask JSON response :rtype: flask.Response :raises NotFound: if the item is not found :raises ValidationError: if an invalid resource was requested """ fallback_resources = request.args.getlist('fallback') # Try all resources input by the user for _resource in [resource] + fallback_resources: item = get_neo4j_node(_resource, uid) # If a resource is found, we don't need to try the other resources if item: break if not item: raise NotFound('This item does not exist') story_manager = estuary.utils.story.BaseStoryManager.get_story_manager( item, current_app.config, limit=True) def _get_partial_story(results, reverse=False): if not results: return [] # Assuming that if Path is the first result, then that's all we want to process results = [list(results[0][0].nodes)] # Reverse will be true when it is a backward query to preserve the story order if reverse: results = [results[0][::-1]] return EstuaryStructuredNode.inflate_results(results)[0] results = [] if story_manager.forward_story: results = story_manager.set_story_labels( item.__label__, _get_partial_story(story_manager.forward_story)) if story_manager.backward_story: backward_query_results = story_manager.set_story_labels( item.__label__, _get_partial_story(story_manager.backward_story, reverse=True), reverse=True) if backward_query_results and results: # Remove the first element of backward_query_results in order to avoid # duplication of the requested resource when result of forward query are not None. backward_query_results = backward_query_results[:-1] results = backward_query_results + results # Adding the artifact itself if it's story is not available if not results: base_instance = estuary.utils.story.BaseStoryManager() wait_times, total_wait_time = base_instance.get_wait_times([item]) rv = {'data': [item.serialized_all], 'meta': {}} rv['meta']['story_related_nodes_forward'] = [0] rv['meta']['story_related_nodes_backward'] = [0] rv['meta']['requested_node_index'] = 0 rv['meta'][ 'story_type'] = story_manager.__class__.__name__[:-12].lower() rv['meta']['wait_times'] = wait_times rv['meta']['total_wait_time'] = total_wait_time rv['meta']['total_processing_time'] = None rv['meta']['processing_time_flag'] = False rv['meta']['total_lead_time'] = 0 try: total_processing_time, flag = base_instance.get_total_processing_time( [item]) rv['meta']['total_processing_time'] = total_processing_time rv['meta']['processing_time_flag'] = flag except: # noqa E722 log.exception('Failed to compute total processing time.') rv['data'][0]['resource_type'] = item.__label__ rv['data'][0]['display_name'] = item.display_name rv['data'][0]['timeline_timestamp'] = item.timeline_timestamp return jsonify(rv) return jsonify(story_manager.format_story_results(results, item))
def get_resource_all_stories(resource, uid): """ Get all unique stories of an artifact from Neo4j. :param str resource: a resource name that maps to a neomodel class :param str uid: the value of the UniqueIdProperty to query with :return: a Flask JSON response :rtype: flask.Response :raises NotFound: if the item is not found :raises ValidationError: if an invalid resource was requested """ fallback_resources = request.args.getlist('fallback') # Try all resources input by the user for _resource in [resource] + fallback_resources: item = get_neo4j_node(_resource, uid) # If a resource is found, we don't need to try the other resources if item: break story_manager = estuary.utils.story.BaseStoryManager.get_story_manager( item, current_app.config) def _get_partial_stories(results, reverse=False): results_list = [] if not results: return results_list # Creating a list of lists where each list is a collection of node IDs # of the nodes present in that particular story path. # Paths are re-sorted in ascending order to simplify the logic below path_nodes_id = [] for path in reversed(results): path_nodes_id.append([node.id for node in path[0].nodes]) unique_paths = [] for index, node_set in enumerate(path_nodes_id[:-1]): unique = True for alternate_set in path_nodes_id[index + 1:]: # If the node_set is a subset of alternate_set, # we know they are the same path except the alternate_set is longer. # If alternate_set and node_set only have one node ID of difference, # we know it's the same path but from the perspective of different siblings. if set(node_set).issubset(set(alternate_set)) or len( set(alternate_set).difference(set(node_set))) == 1: unique = False break if unique: # Since results is from longest to shortest, we need to get the opposite index. unique_paths.append(results[(len(path_nodes_id) - index) - 1][0]) # While traversing, the outer for loop only goes until the second to last element # because the inner for loop always starts one element ahead of the outer for loop. # Hence, all the subsets of the last element will not be added to the unique_paths # list as the for loops will eliminate them. So we add the last element # since we are sure it is unique. unique_paths.append(results[0][0]) if reverse: unique_paths_nodes = [path.nodes[::-1] for path in unique_paths] else: unique_paths_nodes = [path.nodes for path in unique_paths] return EstuaryStructuredNode.inflate_results(unique_paths_nodes) if story_manager.forward_story: results_forward = _get_partial_stories(story_manager.forward_story) else: results_forward = [] if story_manager.backward_story: results_backward = _get_partial_stories(story_manager.backward_story, reverse=True) else: results_backward = [] all_results = [] if not results_backward or not results_forward: if results_forward: results_unidir = [ story_manager.set_story_labels(item.__label__, result) for result in results_forward ] else: results_unidir = [ story_manager.set_story_labels(item.__label__, result, reverse=True) for result in results_backward ] for result in results_unidir: all_results.append(story_manager.format_story_results( result, item)) else: # Combining all the backward and forward paths to generate all the possible full paths for result_forward in results_forward: for result_backward in results_backward: results = story_manager.set_story_labels( item.__label__, result_backward, reverse=True) + \ story_manager.set_story_labels(item.__label__, result_forward)[1:] all_results.append( story_manager.format_story_results(results, item)) # Adding the artifact itself if its story is not available if not all_results: base_instance = estuary.utils.story.BaseStoryManager() wait_times, total_wait_time = base_instance.get_wait_times([item]) rv = {'data': [item.serialized_all], 'meta': {}} rv['meta']['story_related_nodes_forward'] = [0] rv['meta']['story_related_nodes_backward'] = [0] rv['meta']['requested_node_index'] = 0 rv['meta'][ 'story_type'] = story_manager.__class__.__name__[:-12].lower() rv['meta']['wait_times'] = wait_times rv['meta']['total_wait_time'] = total_wait_time rv['meta']['total_processing_time'] = None rv['meta']['processing_time_flag'] = False rv['meta']['total_lead_time'] = 0 try: total_processing_time, flag = base_instance.get_total_processing_time( [item]) rv['meta']['total_processing_time'] = total_processing_time rv['meta']['processing_time_flag'] = flag except: # noqa E722 log.exception('Failed to compute total processing time.') rv['data'][0]['resource_type'] = item.__label__ rv['data'][0]['display_name'] = item.display_name rv['data'][0]['timeline_timestamp'] = item.timeline_timestamp all_results.append(rv) return jsonify(all_results)
def _get_exception_users(): """ Get the list of users that are explicitly whitelisted. If the LDAP search fails, an empty set is returned. :return: a set of usernames :rtype: set :raise InternalServerError: if a required configuration value is not set or the connection to the LDAP server fails """ # Import this here so it's not required for deployments with auth disabled import ldap3 base_error = '%s is not set in the server configuration' ldap_uri = current_app.config.get('LDAP_URI') if not ldap_uri: log.error(base_error, 'LDAP_URI') raise InternalServerError() ldap_group_dn = current_app.config.get('LDAP_EXCEPTIONS_GROUP_DN') if not ldap_group_dn: log.error(base_error, 'LDAP_EXCEPTIONS_GROUP_DN') raise InternalServerError() if ldap_uri.startswith('ldaps://'): ca = current_app.config['LDAP_CA_CERTIFICATE'] log.debug('Connecting to %s using SSL and the CA %s', ldap_uri, ca) tls = ldap3.Tls(ca_certs_file=ca, validate=ssl.CERT_REQUIRED) server = ldap3.Server(ldap_uri, use_ssl=True, tls=tls) else: log.debug('Connecting to %s without SSL', ldap_uri) server = ldap3.Server(ldap_uri) connection = ldap3.Connection(server) try: connection.open() except ldap3.core.exceptions.LDAPSocketOpenError: log.exception('The connection to %s failed', ldap_uri) raise InternalServerError() membership_attr = current_app.config['LDAP_GROUP_MEMBERSHIP_ATTRIBUTE'] log.debug('Searching for the attribute %s on %s', ldap_group_dn, membership_attr) # Set the scope to base so only the group from LDAP_GROUP_DN is returned success = connection.search(ldap_group_dn, '(cn=*)', search_scope=ldap3.BASE, attributes=[membership_attr]) if not success: log.error( 'The user exceptions list could not be determined because the search for the attribute ' '%s on %s failed with %r', membership_attr, ldap_group_dn, connection.response, ) return set() return set([ dn.split('=')[1].split(',')[0] for dn in connection.response[0]['attributes'][membership_attr] ])