def processnode(node_string, startstate_name, endstate_name, nodeprocess_func, nodeerror_func, mark_node_active, *nodeprocess_args): """ <Purpose> First check the current state of the node, to ensure that it is in the correct state. Then run the nodeprocess_func on the node to process the node. Then set the new state of the node once the node has been processed properly. <Arguments> node_string - this is the node itself that is gotten from advertise_lookup, most likely an ip:port address or NAT:ip. startstate_name - the state in which the nodes are in now. endstate_name - the state we want to transition the nodes to. nodeprocess_func - the function that is used to process the node nodeerror_func - the function that is run, if there is an error mark_node_active - This bit determines if we want to mark the node as an active node or not. nodeprocess_args - the arguments for nodeprocess_func <Exceptions> NodeError - raised if the node is in a weird state, or if the node does not exist NodemanagerCommunicationError - raised if problem communicating the node DatabaseError - raised if unable to access database UnexpectedError - if some unusual error occurs thats not caught by others <Side Effects> Database may get modified <Return> Return true if processing the node was successful, otherwise return false if any exceptions occured """ # The pubkey for the state. startstate_pubkey = transition_state_keys[startstate_name] endstate_pubkey = transition_state_keys[endstate_name] # Make sure that the node is just not an empty string. # The node_string could be bad due to bad advertise_lookup. if not node_string: raise NodeError("An empty node was passed down to processnode() with startstate: "+ startstate_name) # Note that the first portion of the node might be an ip or a NAT string. (ip_or_nat_string, port_num) = split_node_string(node_string) log("Starting to process node: "+node_string) # Try to retrieve the vessel dictionary for a node, # on error raise a NodeError exception. try: node_info = nodemanager.get_node_info(ip_or_nat_string, port_num) except NodemanagerCommunicationError: raise # Extract the nodeID in order to acquire a lock nodeID = _do_rsa_publickey_to_string(node_info['nodekey']) # Acquire a node lock lockserver_handle = acquire_node_lock(nodeID) try: log("Retrieving node vesseldict for node: "+node_string) node_info = nodemanager.get_node_info(ip_or_nat_string, port_num) log("Successfully retrieved node_info for node: " + node_string) # If the nodes are in acceptdonationstate, update/check the database # to ensure that it matches the node information. if startstate_pubkey == transition_state_keys['acceptdonation']: add_new_node_to_db(node_string, node_info) log("Successfully added node to the database for node" + node_string) log("The database should reflect the node information accurately") # Get the database object database_nodeobject = maindb.get_node(nodeID) # Retrieve the node state and and the list of vessels. current_node_state_pubkey = get_node_state(node_info, database_nodeobject) # Make sure that the node is in the right state. if current_node_state_pubkey != startstate_pubkey: log("The node is not in the right transition state. NodeID is: " + nodeID + " Current state is " + str(current_node_state_pubkey) + ". Should be in state " + str(startstate_pubkey)) raise NodeError("Node is no longer in the right state!") # Run the processnode function that was passed originally from the transition scripts. try: nodeprocess_func(node_string, node_info, database_nodeobject, *nodeprocess_args) except: log("Failed to process node: " + node_string) raise NodeError("Could not process node: " + node_string + traceback.format_exc()) # Set the node state now that the node has been processed. if startstate_pubkey != endstate_pubkey: log("Trying to set new state for node: " + node_string) set_node_state(database_nodeobject, endstate_pubkey) # If the mark_node_active bit was set, then we want to mark the node # as active in the database. Until the node is marked active, the user # may not get credited. This is usually set at the final stage when all # vessels have been split. if mark_node_active: maindb.mark_node_as_active(database_nodeobject) else: maindb.mark_node_as_inactive(database_nodeobject) log("Finished setting new state " + endstate_name + " on node " + node_string) else: log("Not setting node state: start state and end state are the same.") except NodeError: log("Node data problem when processing node: " + node_string + traceback.format_exc()) return False except NodemanagerCommunicationError: log("Node communication failed while processing node: " + node_string) return False except DatabaseError: log("Ran into problem accessing database while processing node: " + node_string) return False except UnexpectedError: log("Ran into some unexpected error while processing node: " + node_string) return False finally: release_node_lock(lockserver_handle, nodeID) #everything worked out fine return True
def check_node(node, readonly=True, lockserver_handle=None): """ <Purpose> Check a node for problems. This will try to contact the node and will compare the information retrieved from the node to the information we have in our database. It will log and collect the information about the problems. The problem information can be retrieved program <Arguments> node The Node object of the node to be checked. readonly False if the function should mark the node in the database as inactive or broken (and vessels released) when appropriate, True if it should never change anything in the database. Default is True. lockserver_handle If an existing lockserver handle should be used for lock acquisitions, it should be provided here. Otherwise, a new lockserver handle will be used the during of this function call. Note: no locking is done if readonly is True. That is, if there is no reason to lock a node, there is no reason to provide a lockserver_handle. <Exceptions> None <Side Effects> If readonly is False, the database may be updated appropriately based on what the function sees. No changes are ever directly made to the nodes through nodemanager communication regardless of the setting of readonly. However, other scripts might take action based on database changes (e.g. released vessel will quickly be cleaned up by the backend daemon). <Returns> None """ if not readonly: must_destroy_lockserver_handle = False if lockserver_handle is None: must_destroy_lockserver_handle = True lockserver_handle = lockserver.create_lockserver_handle() if not readonly: lockserver.lock_node(lockserver_handle, node.node_identifier) # Be sure to release the node lock, if we are locking the node. try: # Get a fresh node record from the database. It might have changed before # we obtained the lock. node = maindb.get_node(node.node_identifier) # The code beyond this point would be a good candidate for splitting out # into a few smaller functions for readability. donation_list = maindb.get_donations_from_node(node) if len(donation_list) == 0: _report_node_problem(node, "The node has no corresponding donation records. " + "Not marking node broken, though.") try: nodeinfo = nodemanager.get_node_info(node.last_known_ip, node.last_known_port) except NodemanagerCommunicationError: _record_node_communication_failure(readonly, node) _report_node_problem(node, "Can't communicate with node.") return try: nodekey_str = rsa_publickey_to_string(nodeinfo["nodekey"]) except ValueError: _mark_node_broken(readonly, node) _report_node_problem(node, "Invalid nodekey: " + str(nodeinfo["nodekey"])) return # Check that the nodeid matches. If it doesn't, it probably means seattle # was reinstalled or there is a different system at that address now. if node.node_identifier != nodekey_str: _mark_node_inactive(readonly, node) _report_node_problem(node, "Wrong node identifier, the node reports: " + str(nodeinfo["nodekey"])) # Not much more worth checking in this case. return # Check that the database thinks it knows the extra vessel name. if node.extra_vessel_name == "": _mark_node_broken(readonly, node) _report_node_problem(node, "No extra_vessel_name in the database.") # Not much more worth checking in this case. return # Check that a vessel by the name of extra_vessel_name exists on the node. if node.extra_vessel_name not in nodeinfo["vessels"]: _mark_node_broken(readonly, node) _report_node_problem(node, "The extra_vessel_name in the database is a vessel name that doesn't exist on the node.") # Not much more worth checking in this case. return extravesselinfo = nodeinfo["vessels"][node.extra_vessel_name] vessels_in_db = maindb.get_vessels_on_node(node) if len(extravesselinfo["userkeys"]) != 1: _mark_node_broken(readonly, node) _report_node_problem(node, "The extra vessel '" + node.extra_vessel_name + "' doesn't have 1 user key, it has " + str(len(extravesselinfo["userkeys"]))) else: # Figure out which state the node is in according to the state key. recognized_state_name = "" for statename in statekeys: if statekeys[statename] == extravesselinfo["userkeys"][0]: recognized_state_name = statename if not recognized_state_name: _mark_node_broken(readonly, node) _report_node_problem(node, "The extra vessel '" + node.extra_vessel_name + "' doesn't have a recognized user/state key") if len(vessels_in_db) == 0: if recognized_state_name == "onepercentmanyevents" or recognized_state_name == "twopercent": # We don't mark it as broken because it may be in transition by a # transition script away from onepercentmanyevents. If the vessels # in the db have been deleted first but the state key hasn't been # changed yet, we might hit this. Also, it's not so bad to have it # not be marked as broken when it's like this, as it has no vessels # we know about, anyways, so we're not going to be giving questionable # resources to users because of it. _report_node_problem(node, "The node is in the " + recognized_state_name + " state " + "but we don't have any vessels for it in the database.") else: if recognized_state_name != "onepercentmanyevents" and recognized_state_name != "twopercent": # We don't mark it as broken because it may be in transition by a # transition script. Also, we may have other states in the future # besides onepercentmanyevents that have vessels. We don't want # to make all of those nodes inactive if it's just an issue of # someone forgot to update this script. _report_node_problem(node, "The node is in the '" + recognized_state_name + "' state but we have vessels for it in the database.") known_vessel_names = [] for vessel in vessels_in_db: known_vessel_names.append(vessel.name) # Look for vessels on the node with our node ownerkey which aren't in our database. for actualvesselname in nodeinfo["vessels"]: vessel_ownerkey = nodeinfo["vessels"][actualvesselname]["ownerkey"] try: vessel_ownerkey_str = rsa_publickey_to_string(vessel_ownerkey) except ValueError: # At this point we aren't sure it's our node, but let's assume that if # there's an invalid key then the node is broken, period. _mark_node_broken(readonly, node) _report_node_problem(node, "Invalid vessel ownerkey: " + str(vessel_ownerkey)) return if vessel_ownerkey_str == node.owner_pubkey: if actualvesselname not in known_vessel_names and actualvesselname != node.extra_vessel_name: _mark_node_broken(readonly, node) _report_node_problem(node, "The vessel '" + actualvesselname + "' exists on the node " + "with the ownerkey for the node, but it's not in our vessels table.") # Do some checking on each vessel we have in our database. for vessel in vessels_in_db: # Check that the vessel in our database actually exists on the node. if vessel.name not in nodeinfo["vessels"]: _mark_node_broken(readonly, node) _report_node_problem(node, "The vessel '" + vessel.name + "' in our db doesn't exist on the node.") continue vesselinfo = nodeinfo["vessels"][vessel.name] try: vessel_ownerkey_str = rsa_publickey_to_string(vesselinfo["ownerkey"]) except ValueError: _mark_node_broken(readonly, node) _report_node_problem(node, "Invalid vessel ownerkey on a vessel in our db: " + str(vessel_ownerkey)) return # Check that the owner key for the vessel is what we have for the node's owner key in our database. if node.owner_pubkey != vessel_ownerkey_str: _mark_node_broken(readonly, node) _report_node_problem(node, "The vessel '" + vessel.name + "' doesn't have the ownerkey we use for the node.") if not vesselinfo["advertise"]: _mark_node_broken(readonly, node) _report_node_problem(node, "The vessel '" + vessel.name + "' isn't advertising.") # We're only concerned with non-dirty vessels as the backend daemon # should be working on cleaning up dirty vessels. if not vessel.is_dirty: # Check that the user keys that have access are the ones that should have access. users_with_access = maindb.get_users_with_access_to_vessel(vessel) if len(users_with_access) != len(vesselinfo["userkeys"]): _release_vessel(readonly, vessel) _report_node_problem(node, "The vessel '" + vessel.name + "' reports " + str(len(vesselinfo["userkeys"])) + " user keys, but we expected " + str(len(users_with_access))) for user in users_with_access: if rsa_string_to_publickey(user.user_pubkey) not in vesselinfo["userkeys"]: _release_vessel(readonly, vessel) _report_node_problem(node, "The vessel '" + vessel.name + "' doesn't have the userkey for user " + user.username + ".") finally: # We didn't do any locking if this readonly was True. if not readonly: # Release the lock lockserver.unlock_node(lockserver_handle, node.node_identifier) # Destroy the lockserver handle if we created it ourselves. if must_destroy_lockserver_handle: lockserver.destroy_lockserver_handle(lockserver_handle)
def check_node(node, readonly=True, lockserver_handle=None): """ <Purpose> Check a node for problems. This will try to contact the node and will compare the information retrieved from the node to the information we have in our database. It will log and collect the information about the problems. The problem information can be retrieved program <Arguments> node The Node object of the node to be checked. readonly False if the function should mark the node in the database as inactive or broken (and vessels released) when appropriate, True if it should never change anything in the database. Default is True. lockserver_handle If an existing lockserver handle should be used for lock acquisitions, it should be provided here. Otherwise, a new lockserver handle will be used the during of this function call. Note: no locking is done if readonly is True. That is, if there is no reason to lock a node, there is no reason to provide a lockserver_handle. <Exceptions> None <Side Effects> If readonly is False, the database may be updated appropriately based on what the function sees. No changes are ever directly made to the nodes through nodemanager communication regardless of the setting of readonly. However, other scripts might take action based on database changes (e.g. released vessel will quickly be cleaned up by the backend daemon). <Returns> None """ if not readonly: must_destroy_lockserver_handle = False if lockserver_handle is None: must_destroy_lockserver_handle = True lockserver_handle = lockserver.create_lockserver_handle() if not readonly: lockserver.lock_node(lockserver_handle, node.node_identifier) # Be sure to release the node lock, if we are locking the node. try: # Get a fresh node record from the database. It might have changed before # we obtained the lock. node = maindb.get_node(node.node_identifier) # The code beyond this point would be a good candidate for splitting out # into a few smaller functions for readability. donation_list = maindb.get_donations_from_node(node) if len(donation_list) == 0: _report_node_problem( node, "The node has no corresponding donation records. " + "Not marking node broken, though.") try: nodeinfo = nodemanager.get_node_info(node.last_known_ip, node.last_known_port) except NodemanagerCommunicationError: _record_node_communication_failure(readonly, node) _report_node_problem(node, "Can't communicate with node.") return try: nodekey_str = rsa_publickey_to_string(nodeinfo["nodekey"]) except ValueError: _mark_node_broken(readonly, node) _report_node_problem( node, "Invalid nodekey: " + str(nodeinfo["nodekey"])) return # Check that the nodeid matches. If it doesn't, it probably means seattle # was reinstalled or there is a different system at that address now. if node.node_identifier != nodekey_str: _mark_node_inactive(readonly, node) _report_node_problem( node, "Wrong node identifier, the node reports: " + str(nodeinfo["nodekey"])) # Not much more worth checking in this case. return # Check that the database thinks it knows the extra vessel name. if node.extra_vessel_name == "": _mark_node_broken(readonly, node) _report_node_problem(node, "No extra_vessel_name in the database.") # Not much more worth checking in this case. return # Check that a vessel by the name of extra_vessel_name exists on the node. if node.extra_vessel_name not in nodeinfo["vessels"]: _mark_node_broken(readonly, node) _report_node_problem( node, "The extra_vessel_name in the database is a vessel name that doesn't exist on the node." ) # Not much more worth checking in this case. return extravesselinfo = nodeinfo["vessels"][node.extra_vessel_name] vessels_in_db = maindb.get_vessels_on_node(node) if len(extravesselinfo["userkeys"]) != 1: _mark_node_broken(readonly, node) _report_node_problem( node, "The extra vessel '" + node.extra_vessel_name + "' doesn't have 1 user key, it has " + str(len(extravesselinfo["userkeys"]))) else: # Figure out which state the node is in according to the state key. recognized_state_name = "" for statename in statekeys: if statekeys[statename] == extravesselinfo["userkeys"][0]: recognized_state_name = statename if not recognized_state_name: _mark_node_broken(readonly, node) _report_node_problem( node, "The extra vessel '" + node.extra_vessel_name + "' doesn't have a recognized user/state key") if len(vessels_in_db) == 0: if recognized_state_name == "onepercentmanyevents" or recognized_state_name == "twopercent": # We don't mark it as broken because it may be in transition by a # transition script away from onepercentmanyevents. If the vessels # in the db have been deleted first but the state key hasn't been # changed yet, we might hit this. Also, it's not so bad to have it # not be marked as broken when it's like this, as it has no vessels # we know about, anyways, so we're not going to be giving questionable # resources to users because of it. _report_node_problem( node, "The node is in the " + recognized_state_name + " state " + "but we don't have any vessels for it in the database." ) else: if recognized_state_name != "onepercentmanyevents" and recognized_state_name != "twopercent": # We don't mark it as broken because it may be in transition by a # transition script. Also, we may have other states in the future # besides onepercentmanyevents that have vessels. We don't want # to make all of those nodes inactive if it's just an issue of # someone forgot to update this script. _report_node_problem( node, "The node is in the '" + recognized_state_name + "' state but we have vessels for it in the database.") known_vessel_names = [] for vessel in vessels_in_db: known_vessel_names.append(vessel.name) # Look for vessels on the node with our node ownerkey which aren't in our database. for actualvesselname in nodeinfo["vessels"]: vessel_ownerkey = nodeinfo["vessels"][actualvesselname]["ownerkey"] try: vessel_ownerkey_str = rsa_publickey_to_string(vessel_ownerkey) except ValueError: # At this point we aren't sure it's our node, but let's assume that if # there's an invalid key then the node is broken, period. _mark_node_broken(readonly, node) _report_node_problem( node, "Invalid vessel ownerkey: " + str(vessel_ownerkey)) return if vessel_ownerkey_str == node.owner_pubkey: if actualvesselname not in known_vessel_names and actualvesselname != node.extra_vessel_name: _mark_node_broken(readonly, node) _report_node_problem( node, "The vessel '" + actualvesselname + "' exists on the node " + "with the ownerkey for the node, but it's not in our vessels table." ) # Do some checking on each vessel we have in our database. for vessel in vessels_in_db: # Check that the vessel in our database actually exists on the node. if vessel.name not in nodeinfo["vessels"]: _mark_node_broken(readonly, node) _report_node_problem( node, "The vessel '" + vessel.name + "' in our db doesn't exist on the node.") continue vesselinfo = nodeinfo["vessels"][vessel.name] try: vessel_ownerkey_str = rsa_publickey_to_string( vesselinfo["ownerkey"]) except ValueError: _mark_node_broken(readonly, node) _report_node_problem( node, "Invalid vessel ownerkey on a vessel in our db: " + str(vessel_ownerkey)) return # Check that the owner key for the vessel is what we have for the node's owner key in our database. if node.owner_pubkey != vessel_ownerkey_str: _mark_node_broken(readonly, node) _report_node_problem( node, "The vessel '" + vessel.name + "' doesn't have the ownerkey we use for the node.") if not vesselinfo["advertise"]: _mark_node_broken(readonly, node) _report_node_problem( node, "The vessel '" + vessel.name + "' isn't advertising.") # We're only concerned with non-dirty vessels as the backend daemon # should be working on cleaning up dirty vessels. if not vessel.is_dirty: # Check that the user keys that have access are the ones that should have access. users_with_access = maindb.get_users_with_access_to_vessel( vessel) if len(users_with_access) != len(vesselinfo["userkeys"]): _release_vessel(readonly, vessel) _report_node_problem( node, "The vessel '" + vessel.name + "' reports " + str(len(vesselinfo["userkeys"])) + " user keys, but we expected " + str(len(users_with_access))) for user in users_with_access: if rsa_string_to_publickey( user.user_pubkey) not in vesselinfo["userkeys"]: _release_vessel(readonly, vessel) _report_node_problem( node, "The vessel '" + vessel.name + "' doesn't have the userkey for user " + user.username + ".") finally: # We didn't do any locking if this readonly was True. if not readonly: # Release the lock lockserver.unlock_node(lockserver_handle, node.node_identifier) # Destroy the lockserver handle if we created it ourselves. if must_destroy_lockserver_handle: lockserver.destroy_lockserver_handle(lockserver_handle)