def handle(self, *args, **options): # Get the list of the servers to be querried core_nodes = CORE_NODES #print core_nodes for server in core_nodes.values(): new_resources = [] resources_to_update = [] local_inventory = [] # Login url = server['URL'] user_name = server['USERNAME'] password = server['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # Get the inventory list. remote_inventory = get_inventory(opener, "{0}/sync/".format(url)) remote_inventory_count = len(remote_inventory) sys.stdout.write("\nRemote node " + BOLD + url + RESET + " contains " + BOLD + str(remote_inventory_count) + " resources.\n" + RESET) # Get a list of uuid's and digests from the local inventory non_master_storage_objects = StorageObject.objects.exclude(copy_status=MASTER) for item in non_master_storage_objects: local_inventory.append({'id':str(item.identifier), 'digest':str(item.digest_checksum)}) #print "\nLOCAL INVENTORY: \n" + str(local_inventory) local_inventory_count = len(local_inventory) sys.stdout.write("\nLocal node contains " + BOLD + str(local_inventory_count) + " resources.\n" + RESET) # Create an list of ids to speed-up matching local_inventory_indexed = [] for item in local_inventory: local_inventory_indexed.append(item['id']) #print "\nINVENTORY LIST : \n" + str(local_inventory_indexed) # Create two lists: # 1. Containing items to be added - items that exist in the # remote inventory and not in the local. # 2. Containing items to be updated - items that exist in both # inventories but the remote is different from the local for item in remote_inventory: item_id = item['id'] if item_id not in local_inventory_indexed: new_resources.append(item) else: # Find the corresponding item in the local inventory # and compare digests for local_item in local_inventory: if (item_id == local_item['id']) \ and not (item['digest'] == local_item['digest']): resources_to_update.append(item) new_resources_count = len(new_resources) resources_to_update_count = len(resources_to_update) print new_resources sys.stdout.write("\n " + BOLD + str(new_resources_count) + \ " new resources" + RESET + " will be added to your repository.\n") sys.stdout.write("\n " + BOLD + str(resources_to_update_count) + \ " resources" + RESET + " will be updated in your repository.\n") sys.stdout.write("\n Importing and Indexing...\n") for resource in new_resources: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = get_full_metadata(opener, "{0}/sync/{1}/metadata/".format(url, resource['id']), resource['digest']) update_resource(storage_json, resource_xml_string) for resource in resources_to_update: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = get_full_metadata(opener, "{0}/sync/{1}/metadata/".format(url, resource['id']), resource['digest']) update_resource(storage_json, resource_xml_string)
def sync_with_single_node(node_id, node, is_proxy, id_file=None): """ Synchronizes this META-SHARE node with another META-SHARE node using the given node description. `node_id` is the unique key under which the node settings are stored in the dict `node` is a dict with synchronization settings for the node to synchronize with `is_proxy` must be True if this node is a proxy for the given nodes; it must be False if the given nodes are not proxied by this node """ # login url = node['URL'] user_name = node['USERNAME'] password = node['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # create inventory url inv_url = "{0}/sync/?".format(url) # add sync protocols to url for index, _prot in enumerate(settings.SYNC_PROTOCOLS): inv_url = inv_url + "sync_protocol={}".format(_prot) if (index < len(settings.SYNC_PROTOCOLS) - 1): inv_url = inv_url + "&" # get the inventory list remote_inventory = get_inventory(opener, inv_url) remote_inventory_count = len(remote_inventory) LOGGER.info("Remote node {} contains {} resources".format( node_id, remote_inventory_count)) # create a dictionary of uuid's and digests of resource from the local # inventory that stem from the remote node local_inventory = {} remote_storage_objects = StorageObject.objects.filter( source_node=node_id) for item in remote_storage_objects: local_inventory[item.identifier] = item.digest_checksum local_inventory_count = len(local_inventory) LOGGER.info( "Local node contains {} resources stemming from remote node {}". format(local_inventory_count, node_id)) # create three lists: # 1. list of resources to be added - resources that exist in the remote # inventory but not in the local # 2. list of resources to be updated - resources that exist in both # inventories but the remote is different from the local # 3. list of resources to be removed - resources that exist in the local # inventory but not in the remote resources_to_add = [] resources_to_update = [] for remote_res_id, remote_digest in remote_inventory.iteritems(): if remote_res_id in local_inventory: # compare checksums; if they differ, the resource has to be updated if remote_digest != local_inventory[remote_res_id]: resources_to_update.append(remote_res_id) else: # resources have the same checksum, nothing to do pass # remove the resource from the local inventory; what is left # in the local inventory after this loop are the resources # to delete del local_inventory[remote_res_id] else: # resource exists in the remote inventory but not in the local; # make sure that the remote node does not try to add a resource # for which we now that it stems from ANOTHER node or OUR node try: local_so = StorageObject.objects.get( identifier=remote_res_id) source_node = local_so.source_node if not source_node: source_node = 'LOCAL NODE' LOGGER.warn( "Node {} wants to add resource {} that we already know from node {}" .format(node_id, remote_res_id, source_node)) except ObjectDoesNotExist: resources_to_add.append(remote_res_id) # remaining local inventory resources are to delete resources_to_delete = local_inventory.keys() # print informative messages to the user resources_to_add_count = len(resources_to_add) resources_to_update_count = len(resources_to_update) resources_to_delete_count = len(resources_to_delete) LOGGER.info( "{} resources will be added".format(resources_to_add_count)) LOGGER.info( "{} resources will be updated".format(resources_to_update_count)) LOGGER.info( "{} resources will be deleted".format(resources_to_delete_count)) if is_proxy: _copy_status = PROXY else: _copy_status = REMOTE # add resources from remote inventory num_added = 0 for res_id in resources_to_add: try: LOGGER.info("adding resource {0} from node {1}".format( res_id, node_id)) res_obj = Command._get_remote_resource( res_id, remote_inventory[res_id], node_id, node, opener, _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) num_added += 1 except: LOGGER.error("Error while adding resource {}".format(res_id), exc_info=True) # update resources from remote inventory num_updated = 0 for res_id in resources_to_update: try: LOGGER.info("updating resource {0} from node {1}".format( res_id, node_id)) res_obj = Command._get_remote_resource( res_id, remote_inventory[res_id], node_id, node, opener, _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) if remote_inventory[ res_id] != res_obj.storage_object.digest_checksum: id_file.write("Different digests!\n") num_updated += 1 except: LOGGER.error("Error while updating resource {}".format(res_id), exc_info=True) # delete resources from remote inventory num_deleted = 0 for res_id in resources_to_delete: try: LOGGER.info("removing resource {0} from node {1}".format( res_id, node_id)) _so_to_remove = StorageObject.objects.get(identifier=res_id) remove_resource(_so_to_remove) num_deleted += 1 except: LOGGER.error("Error while removing resource {}".format(res_id), exc_info=True) LOGGER.info("{} of {} resources successfully added." \ .format(num_added, resources_to_add_count)) LOGGER.info("{} of {} resources successfully updated." \ .format(num_updated, resources_to_update_count)) LOGGER.info("{} of {} resources successfully removed." \ .format(num_deleted, resources_to_delete_count))
def sync_with_server(server, is_proxy, id_file=None): """ Synchronizes this META-SHARE node with another META-SHARE node using the given server description. `nodes` is a dict of dicts with synchronization settings for the nodes to synchronize with `is_proxy` must be True if this node is a proxy for the given nodes; it must be False if the given nodes are not proxied by this node """ new_resources = [] resources_to_update = [] local_inventory = [] # Login url = server['URL'] user_name = server['USERNAME'] password = server['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # Get the inventory list. remote_inventory = get_inventory(opener, "{0}/sync/".format(url)) # handle existing resources remote_inventory_existing = remote_inventory['existing'] remote_inventory_count = len(remote_inventory_existing) sys.stdout.write("\nRemote node " + BOLD + url + RESET + " contains " \ + BOLD + str(remote_inventory_count) + " resources.\n" + RESET) # Get a list of uuid's and digests from the local inventory non_master_storage_objects = StorageObject.objects.exclude(copy_status=MASTER) for item in non_master_storage_objects: local_inventory.append({'id':str(item.identifier), 'digest':str(item.digest_checksum)}) local_inventory_count = len(local_inventory) sys.stdout.write("\nLocal node contains " + BOLD + str(local_inventory_count) \ + " resources.\n" + RESET) # Create an list of ids to speed-up matching local_inventory_indexed = [] for item in local_inventory: local_inventory_indexed.append(item['id']) # Create two lists: # 1. Containing items to be added - items that exist in the # remote inventory and not in the local. # 2. Containing items to be updated - items that exist in both # inventories but the remote is different from the local for item in remote_inventory_existing: item_id = item['id'] if item_id not in local_inventory_indexed: new_resources.append(item) else: # Find the corresponding item in the local inventory # and compare digests for local_item in local_inventory: if (item_id == local_item['id']) \ and not (item['digest'] == local_item['digest']): resources_to_update.append(item) # Print informative messages to the user new_resources_count = len(new_resources) resources_to_update_count = len(resources_to_update) if ((new_resources_count == 0) and (resources_to_update_count == 0)): sys.stdout.write("\nThere are no resources marked" +\ " for updating!\n") else: # If there are resources to add or update sys.stdout.write("\n" + BOLD + \ ("No" if new_resources_count == 0 \ else str(new_resources_count)) + \ " new resource" + ("" if new_resources_count == 1 else "s") \ + RESET + " will be added to your repository.\n") sys.stdout.write("\n" + BOLD + \ ("No" if resources_to_update_count == 0 \ else str(resources_to_update_count)) + \ " resource" + ("" if resources_to_update_count == 1 else "s") \ + RESET + " will be updated in your repository.\n") sys.stdout.write("\nImporting and Indexing...\n") if is_proxy: _copy_status = PROXY else: _copy_status = REMOTE # Get the full xmls from remore inventory and update local inventory for resource in new_resources: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = \ get_full_metadata(opener, "{0}/sync/{1}/metadata/" \ .format(url, resource['id']), resource['digest']) res_obj = update_resource(storage_json, resource_xml_string, resource['digest'], _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) for resource in resources_to_update: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = \ get_full_metadata(opener, "{0}/sync/{1}/metadata/" \ .format(url, resource['id']), resource['digest']) res_obj = update_resource(storage_json, resource_xml_string, resource['digest'], _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) if resource['digest'] != res_obj.storage_object.digest_checksum: id_file.write("Different digests!\n") sys.stdout.write("\n\n") # handle removed resources remote_inventory_removed = remote_inventory['removed'] remote_inventory_removed_count = len(remote_inventory_removed) sys.stdout.write("\nRemote node " + BOLD + url + RESET + " lists " \ + BOLD + str(remote_inventory_removed_count) + " resources as removed.\n" + RESET) removed_count = 0 for removed_id in remote_inventory_removed: if removed_id in local_inventory_indexed: # remove resource from this node; # if it is a PROXY copy, also create a corresponding removed # object, so that the removal is propagated to other # META-SHARE Managing Nodes (aka. inner nodes) sys.stdout.write("\nRemoving id {}...\n".format(removed_id)) removed_count += 1 _so_to_remove = StorageObject.objects.get(identifier=removed_id) if _so_to_remove.copy_status is PROXY: _rem_obj = RemovedObject.objects.create(identifier=removed_id) _rem_obj.save() remove_resource(_so_to_remove) sys.stdout.write("\n{} resources removed\n".format(removed_count))
def sync_with_single_node(node_id, node, is_proxy, id_file=None): """ Synchronizes this META-SHARE node with another META-SHARE node using the given node description. `node_id` is the unique key under which the node settings are stored in the dict `node` is a dict with synchronization settings for the node to synchronize with `is_proxy` must be True if this node is a proxy for the given nodes; it must be False if the given nodes are not proxied by this node """ # login url = node['URL'] user_name = node['USERNAME'] password = node['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # create inventory url inv_url = "{0}/sync/?".format(url) # add sync protocols to url for index, _prot in enumerate(settings.SYNC_PROTOCOLS): inv_url = inv_url + "sync_protocol={}".format(_prot) if (index < len(settings.SYNC_PROTOCOLS) - 1): inv_url = inv_url + "&" # get the inventory list remote_inventory = get_inventory(opener, inv_url) remote_inventory_count = len(remote_inventory) LOGGER.info("Remote node {} contains {} resources".format( node_id, remote_inventory_count)) # create a dictionary of uuid's and digests of resource from the local # inventory that stem from the remote node local_inventory = {} remote_storage_objects = StorageObject.objects.filter(source_node=node_id) for item in remote_storage_objects: local_inventory[item.identifier] = item.digest_checksum local_inventory_count = len(local_inventory) LOGGER.info("Local node contains {} resources stemming from remote node {}".format( local_inventory_count, node_id)) # create three lists: # 1. list of resources to be added - resources that exist in the remote # inventory but not in the local # 2. list of resources to be updated - resources that exist in both # inventories but the remote is different from the local # 3. list of resources to be removed - resources that exist in the local # inventory but not in the remote resources_to_add = [] resources_to_update = [] for remote_res_id, remote_digest in remote_inventory.iteritems(): if remote_res_id in local_inventory: # compare checksums; if they differ, the resource has to be updated if remote_digest != local_inventory[remote_res_id]: resources_to_update.append(remote_res_id) else: # resources have the same checksum, nothing to do pass # remove the resource from the local inventory; what is left # in the local inventory after this loop are the resources # to delete del local_inventory[remote_res_id] else: # resource exists in the remote inventory but not in the local; # make sure that the remote node does not try to add a resource # for which we now that it stems from ANOTHER node or OUR node try: local_so = StorageObject.objects.get(identifier=remote_res_id) source_node = local_so.source_node if not source_node: source_node = 'LOCAL NODE' LOGGER.warn( "Node {} wants to add resource {} that we already know from node {}".format( node_id, remote_res_id, source_node)) except ObjectDoesNotExist: resources_to_add.append(remote_res_id) # remaining local inventory resources are to delete resources_to_delete = local_inventory.keys() # print informative messages to the user resources_to_add_count = len(resources_to_add) resources_to_update_count = len(resources_to_update) resources_to_delete_count = len(resources_to_delete) LOGGER.info("{} resources will be added".format(resources_to_add_count)) LOGGER.info("{} resources will be updated".format(resources_to_update_count)) LOGGER.info("{} resources will be deleted".format(resources_to_delete_count)) if is_proxy: _copy_status = PROXY else: _copy_status = REMOTE # add resources from remote inventory num_added = 0 for res_id in resources_to_add: try: LOGGER.info("adding resource {0} from node {1}".format(res_id, node_id)) res_obj = Command._get_remote_resource( res_id, remote_inventory[res_id], node_id, node, opener, _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) num_added += 1 except: LOGGER.error("Error while adding resource {}".format(res_id), exc_info=True) # update resources from remote inventory num_updated = 0 for res_id in resources_to_update: try: LOGGER.info("updating resource {0} from node {1}".format(res_id, node_id)) res_obj = Command._get_remote_resource( res_id, remote_inventory[res_id], node_id, node, opener, _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) if remote_inventory[res_id] != res_obj.storage_object.digest_checksum: id_file.write("Different digests!\n") num_updated += 1 except: LOGGER.error("Error while updating resource {}".format(res_id), exc_info=True) # delete resources from remote inventory num_deleted = 0 for res_id in resources_to_delete: try: LOGGER.info("removing resource {0} from node {1}".format(res_id, node_id)) _so_to_remove = StorageObject.objects.get(identifier=res_id) remove_resource(_so_to_remove) num_deleted += 1 except: LOGGER.error("Error while removing resource {}".format(res_id), exc_info=True) LOGGER.info("{} of {} resources successfully added." \ .format(num_added, resources_to_add_count)) LOGGER.info("{} of {} resources successfully updated." \ .format(num_updated, resources_to_update_count)) LOGGER.info("{} of {} resources successfully removed." \ .format(num_deleted, resources_to_delete_count))
def sync_with_nodes(nodes, is_proxy): """ Synchronizes this META-SHARE node with the given other META-SHARE nodes. `nodes` is a dict of dicts with synchronization settings for the nodes to synchronize with `is_proxy` must be True if this node is a proxy for the given nodes; it must be False if the given nodes are not proxied by this node """ for server in nodes.values(): new_resources = [] resources_to_update = [] local_inventory = [] # Login url = server['URL'] user_name = server['USERNAME'] password = server['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # Get the inventory list. remote_inventory = get_inventory(opener, "{0}/sync/".format(url)) remote_inventory_count = len(remote_inventory) sys.stdout.write("\nRemote node " + BOLD + url + RESET + " contains " + BOLD + str(remote_inventory_count) + " resources.\n" + RESET) # Get a list of uuid's and digests from the local inventory non_master_storage_objects = StorageObject.objects.exclude(copy_status=MASTER) for item in non_master_storage_objects: local_inventory.append({'id':str(item.identifier), 'digest':str(item.digest_checksum)}) local_inventory_count = len(local_inventory) sys.stdout.write("\nLocal node contains " + BOLD + str(local_inventory_count) + " resources.\n" + RESET) # Create an list of ids to speed-up matching local_inventory_indexed = [] for item in local_inventory: local_inventory_indexed.append(item['id']) # Create two lists: # 1. Containing items to be added - items that exist in the # remote inventory and not in the local. # 2. Containing items to be updated - items that exist in both # inventories but the remote is different from the local for item in remote_inventory: item_id = item['id'] if item_id not in local_inventory_indexed: new_resources.append(item) else: # Find the corresponding item in the local inventory # and compare digests for local_item in local_inventory: if (item_id == local_item['id']) \ and not (item['digest'] == local_item['digest']): resources_to_update.append(item) # Print informative messages to the user new_resources_count = len(new_resources) resources_to_update_count = len(resources_to_update) if ((new_resources_count == 0) and (resources_to_update_count == 0)): sys.stdout.write("\nThere are no resources marked" +\ " for updating!\n") else: # If there are resources to add or update sys.stdout.write("\n" + BOLD + \ ("No" if new_resources_count == 0 \ else str(new_resources_count)) + \ " new resource" + ("" if new_resources_count == 1 else "s") \ + RESET + " will be added to your repository.\n") sys.stdout.write("\n" + BOLD + \ ("No" if resources_to_update_count == 0 \ else str(resources_to_update_count)) + \ " resource" + ("" if resources_to_update_count == 1 else "s") \ + RESET + " will be updated in your repository.\n") sys.stdout.write("\nImporting and Indexing...\n") if is_proxy: _copy_status = PROXY else: _copy_status = REMOTE # Get the full xmls from remore inventory and update local inventory for resource in new_resources: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = \ get_full_metadata(opener, "{0}/sync/{1}/metadata/" \ .format(url, resource['id']), resource['digest']) update_resource(storage_json, resource_xml_string, resource['digest'], _copy_status) for resource in resources_to_update: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = \ get_full_metadata(opener, "{0}/sync/{1}/metadata/" \ .format(url, resource['id']), resource['digest']) update_resource(storage_json, resource_xml_string, resource['digest'], _copy_status) sys.stdout.write("\n\n")