def handle(self, *args, **options): try: # before starting, make sure to lock the storage so that any other # processes with heavy/frequent operations on the storage don't get # in our way lock = Lock('storage') lock.acquire() # collect current proxied node ids proxied_ids = [p for p in settings.PROXIED_NODES] # iterate over proxy resources and check for each if its source node # id is still listed in the proxied node id list remove_count = 0 for proxy_res in StorageObject.objects.filter(copy_status=PROXY): if not proxy_res.source_node in proxied_ids: # delete the associated resource sys.stdout.write("\nremoving proxied resource {}\n" \ .format(proxy_res.identifier)) LOGGER.info("removing from proxied node {} resource {}" \ .format(proxy_res.source_node, proxy_res.identifier)) remove_count += 1 remove_resource(proxy_res) sys.stdout.write("\n{} proxied resources removed\n" \ .format(remove_count)) LOGGER.info("A total of {} resources have been removed" \ .format(remove_count)) finally: lock.release()
def handle(self, *args, **options): try: # before starting, make sure to lock the storage so that any other # processes with heavy/frequent operations on the storage don't get # in our way lock = Lock('storage') lock.acquire() for node_name in args: LOGGER.info("checking node {}".format(node_name)) remove_count = 0 for res in StorageObject.objects.filter(source_node=node_name): remove_count += 1 LOGGER.info("removing resource {}".format(res.identifier)) remove_resource(res) LOGGER.info("removed {} resources of node {}" \ .format(remove_count, node_name)) finally: lock.release()
def handle(self, *args, **options): # collect current proxy urls proxy_urls = [] for proxy in settings.PROXIED_NODES: proxy_urls.append(settings.PROXIED_NODES[proxy]['URL']) # iterate over proxy resources and check for each if its source url # is still listed in the proxied node list remove_count = 0 for proxy_res in StorageObject.objects.filter(copy_status=PROXY): if not proxy_res.source_url in proxy_urls: # delete the associated resource and create a RemovedObject # to let the other nodes know that the resource has been removed # when synchronizing sys.stdout.write("\nremoving proxied resource {}\n".format(proxy_res.identifier)) remove_count += 1 rem_obj = RemovedObject.objects.create(identifier=proxy_res.identifier) rem_obj.save() remove_resource(proxy_res) sys.stdout.write("\n{} proxied resources removed\n".format(remove_count))
def test_views(self): # client 1 views all 4 resources client_1 = Client() man = TogetherManager.getManager(Resource.VIEW) response = client_1.get(self.res_1.get_absolute_url(), follow = True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(0, len(view_res)) response = client_1.get(self.res_2.get_absolute_url(), follow = True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(1, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(1, len(view_res)) self.assertEqual(1, man.getTogetherCount(self.res_1, self.res_2)) response = client_1.get(self.res_3.get_absolute_url(), follow = True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(2, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(2, len(view_res)) view_res = man.getTogetherList(self.res_3, 0) self.assertEqual(2, len(view_res)) response = client_1.get(self.res_4.get_absolute_url(), follow = True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_3, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_4, 0) self.assertEqual(3, len(view_res)) # another client views 2 of the resources, counts are increased client_2 = Client() response = client_2.get(self.res_1.get_absolute_url(), follow = True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(3, len(view_res)) response = client_2.get(self.res_2.get_absolute_url(), follow = True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(3, len(view_res)) # counts of res_1 and res_2 appearing together is increased self.assertEqual(2, man.getTogetherCount(self.res_1, self.res_2)) # make sure that downloads are no touched man = TogetherManager.getManager(Resource.DOWNLOAD) download_res = man.getTogetherList(self.res_1, 0) self.assertEqual(0, len(download_res)) # make sure that statistics are updated when a resource is # completely removed saveLRStats(self.res_1, UPDATE_STAT) saveLRStats(self.res_2, UPDATE_STAT) saveLRStats(self.res_3, UPDATE_STAT) saveLRStats(self.res_4, UPDATE_STAT) self.assertEquals(9, len(LRStats.objects.all())) self.assertEquals(228, len(UsageStats.objects.all())) remove_resource(self.res_1.storage_object) self.assertEquals(7, len(LRStats.objects.all())) self.assertEquals(181, len(UsageStats.objects.all()))
def test_views(self): # client 1 views all 4 resources client_1 = Client() man = TogetherManager.getManager(Resource.VIEW) response = client_1.get(self.res_1.get_absolute_url(), follow=True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(0, len(view_res)) response = client_1.get(self.res_2.get_absolute_url(), follow=True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(1, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(1, len(view_res)) self.assertEqual(1, man.getTogetherCount(self.res_1, self.res_2)) response = client_1.get(self.res_3.get_absolute_url(), follow=True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(2, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(2, len(view_res)) view_res = man.getTogetherList(self.res_3, 0) self.assertEqual(2, len(view_res)) response = client_1.get(self.res_4.get_absolute_url(), follow=True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_3, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_4, 0) self.assertEqual(3, len(view_res)) # another client views 2 of the resources, counts are increased client_2 = Client() response = client_2.get(self.res_1.get_absolute_url(), follow=True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(3, len(view_res)) response = client_2.get(self.res_2.get_absolute_url(), follow=True) self.assertEquals(200, response.status_code) view_res = man.getTogetherList(self.res_1, 0) self.assertEqual(3, len(view_res)) view_res = man.getTogetherList(self.res_2, 0) self.assertEqual(3, len(view_res)) # counts of res_1 and res_2 appearing together is increased self.assertEqual(2, man.getTogetherCount(self.res_1, self.res_2)) # make sure that downloads are no touched man = TogetherManager.getManager(Resource.DOWNLOAD) download_res = man.getTogetherList(self.res_1, 0) self.assertEqual(0, len(download_res)) # make sure that statistics are updated when a resource is # completely removed saveLRStats(self.res_1, UPDATE_STAT) saveLRStats(self.res_2, UPDATE_STAT) saveLRStats(self.res_3, UPDATE_STAT) saveLRStats(self.res_4, UPDATE_STAT) self.assertEquals(9, len(LRStats.objects.all())) self.assertEquals(219, len(UsageStats.objects.all())) remove_resource(self.res_1.storage_object) self.assertEquals(7, len(LRStats.objects.all())) self.assertEquals(163, len(UsageStats.objects.all()))
def sync_with_single_node(node_id, node, is_proxy, id_file=None): """ Synchronizes this META-SHARE node with another META-SHARE node using the given node description. `node_id` is the unique key under which the node settings are stored in the dict `node` is a dict with synchronization settings for the node to synchronize with `is_proxy` must be True if this node is a proxy for the given nodes; it must be False if the given nodes are not proxied by this node """ # login url = node['URL'] user_name = node['USERNAME'] password = node['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # create inventory url inv_url = "{0}/sync/?".format(url) # add sync protocols to url for index, _prot in enumerate(settings.SYNC_PROTOCOLS): inv_url = inv_url + "sync_protocol={}".format(_prot) if (index < len(settings.SYNC_PROTOCOLS) - 1): inv_url = inv_url + "&" # get the inventory list remote_inventory = get_inventory(opener, inv_url) remote_inventory_count = len(remote_inventory) LOGGER.info("Remote node {} contains {} resources".format( node_id, remote_inventory_count)) # create a dictionary of uuid's and digests of resource from the local # inventory that stem from the remote node local_inventory = {} remote_storage_objects = StorageObject.objects.filter(source_node=node_id) for item in remote_storage_objects: local_inventory[item.identifier] = item.digest_checksum local_inventory_count = len(local_inventory) LOGGER.info("Local node contains {} resources stemming from remote node {}".format( local_inventory_count, node_id)) # create three lists: # 1. list of resources to be added - resources that exist in the remote # inventory but not in the local # 2. list of resources to be updated - resources that exist in both # inventories but the remote is different from the local # 3. list of resources to be removed - resources that exist in the local # inventory but not in the remote resources_to_add = [] resources_to_update = [] for remote_res_id, remote_digest in remote_inventory.iteritems(): if remote_res_id in local_inventory: # compare checksums; if they differ, the resource has to be updated if remote_digest != local_inventory[remote_res_id]: resources_to_update.append(remote_res_id) else: # resources have the same checksum, nothing to do pass # remove the resource from the local inventory; what is left # in the local inventory after this loop are the resources # to delete del local_inventory[remote_res_id] else: # resource exists in the remote inventory but not in the local; # make sure that the remote node does not try to add a resource # for which we now that it stems from ANOTHER node or OUR node try: local_so = StorageObject.objects.get(identifier=remote_res_id) source_node = local_so.source_node if not source_node: source_node = 'LOCAL NODE' LOGGER.warn( "Node {} wants to add resource {} that we already know from node {}".format( node_id, remote_res_id, source_node)) except ObjectDoesNotExist: resources_to_add.append(remote_res_id) # remaining local inventory resources are to delete resources_to_delete = local_inventory.keys() # print informative messages to the user resources_to_add_count = len(resources_to_add) resources_to_update_count = len(resources_to_update) resources_to_delete_count = len(resources_to_delete) LOGGER.info("{} resources will be added".format(resources_to_add_count)) LOGGER.info("{} resources will be updated".format(resources_to_update_count)) LOGGER.info("{} resources will be deleted".format(resources_to_delete_count)) if is_proxy: _copy_status = PROXY else: _copy_status = REMOTE # add resources from remote inventory num_added = 0 for res_id in resources_to_add: try: LOGGER.info("adding resource {0} from node {1}".format(res_id, node_id)) res_obj = Command._get_remote_resource( res_id, remote_inventory[res_id], node_id, node, opener, _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) num_added += 1 except: LOGGER.error("Error while adding resource {}".format(res_id), exc_info=True) # update resources from remote inventory num_updated = 0 for res_id in resources_to_update: try: LOGGER.info("updating resource {0} from node {1}".format(res_id, node_id)) res_obj = Command._get_remote_resource( res_id, remote_inventory[res_id], node_id, node, opener, _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) if remote_inventory[res_id] != res_obj.storage_object.digest_checksum: id_file.write("Different digests!\n") num_updated += 1 except: LOGGER.error("Error while updating resource {}".format(res_id), exc_info=True) # delete resources from remote inventory num_deleted = 0 for res_id in resources_to_delete: try: LOGGER.info("removing resource {0} from node {1}".format(res_id, node_id)) _so_to_remove = StorageObject.objects.get(identifier=res_id) remove_resource(_so_to_remove) num_deleted += 1 except: LOGGER.error("Error while removing resource {}".format(res_id), exc_info=True) LOGGER.info("{} of {} resources successfully added." \ .format(num_added, resources_to_add_count)) LOGGER.info("{} of {} resources successfully updated." \ .format(num_updated, resources_to_update_count)) LOGGER.info("{} of {} resources successfully removed." \ .format(num_deleted, resources_to_delete_count))
def sync_with_server(server, is_proxy, id_file=None): """ Synchronizes this META-SHARE node with another META-SHARE node using the given server description. `nodes` is a dict of dicts with synchronization settings for the nodes to synchronize with `is_proxy` must be True if this node is a proxy for the given nodes; it must be False if the given nodes are not proxied by this node """ new_resources = [] resources_to_update = [] local_inventory = [] # Login url = server['URL'] user_name = server['USERNAME'] password = server['PASSWORD'] opener = login("{0}/login/".format(url), user_name, password) # Get the inventory list. remote_inventory = get_inventory(opener, "{0}/sync/".format(url)) # handle existing resources remote_inventory_existing = remote_inventory['existing'] remote_inventory_count = len(remote_inventory_existing) sys.stdout.write("\nRemote node " + BOLD + url + RESET + " contains " \ + BOLD + str(remote_inventory_count) + " resources.\n" + RESET) # Get a list of uuid's and digests from the local inventory non_master_storage_objects = StorageObject.objects.exclude(copy_status=MASTER) for item in non_master_storage_objects: local_inventory.append({'id':str(item.identifier), 'digest':str(item.digest_checksum)}) local_inventory_count = len(local_inventory) sys.stdout.write("\nLocal node contains " + BOLD + str(local_inventory_count) \ + " resources.\n" + RESET) # Create an list of ids to speed-up matching local_inventory_indexed = [] for item in local_inventory: local_inventory_indexed.append(item['id']) # Create two lists: # 1. Containing items to be added - items that exist in the # remote inventory and not in the local. # 2. Containing items to be updated - items that exist in both # inventories but the remote is different from the local for item in remote_inventory_existing: item_id = item['id'] if item_id not in local_inventory_indexed: new_resources.append(item) else: # Find the corresponding item in the local inventory # and compare digests for local_item in local_inventory: if (item_id == local_item['id']) \ and not (item['digest'] == local_item['digest']): resources_to_update.append(item) # Print informative messages to the user new_resources_count = len(new_resources) resources_to_update_count = len(resources_to_update) if ((new_resources_count == 0) and (resources_to_update_count == 0)): sys.stdout.write("\nThere are no resources marked" +\ " for updating!\n") else: # If there are resources to add or update sys.stdout.write("\n" + BOLD + \ ("No" if new_resources_count == 0 \ else str(new_resources_count)) + \ " new resource" + ("" if new_resources_count == 1 else "s") \ + RESET + " will be added to your repository.\n") sys.stdout.write("\n" + BOLD + \ ("No" if resources_to_update_count == 0 \ else str(resources_to_update_count)) + \ " resource" + ("" if resources_to_update_count == 1 else "s") \ + RESET + " will be updated in your repository.\n") sys.stdout.write("\nImporting and Indexing...\n") if is_proxy: _copy_status = PROXY else: _copy_status = REMOTE # Get the full xmls from remore inventory and update local inventory for resource in new_resources: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = \ get_full_metadata(opener, "{0}/sync/{1}/metadata/" \ .format(url, resource['id']), resource['digest']) res_obj = update_resource(storage_json, resource_xml_string, resource['digest'], _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) for resource in resources_to_update: # Get the json storage object and the actual metadata xml storage_json, resource_xml_string = \ get_full_metadata(opener, "{0}/sync/{1}/metadata/" \ .format(url, resource['id']), resource['digest']) res_obj = update_resource(storage_json, resource_xml_string, resource['digest'], _copy_status) if not id_file is None: id_file.write("--->RESOURCE_ID:{0};STORAGE_IDENTIFIER:{1}\n"\ .format(res_obj.id, res_obj.storage_object.identifier)) if resource['digest'] != res_obj.storage_object.digest_checksum: id_file.write("Different digests!\n") sys.stdout.write("\n\n") # handle removed resources remote_inventory_removed = remote_inventory['removed'] remote_inventory_removed_count = len(remote_inventory_removed) sys.stdout.write("\nRemote node " + BOLD + url + RESET + " lists " \ + BOLD + str(remote_inventory_removed_count) + " resources as removed.\n" + RESET) removed_count = 0 for removed_id in remote_inventory_removed: if removed_id in local_inventory_indexed: # remove resource from this node; # if it is a PROXY copy, also create a corresponding removed # object, so that the removal is propagated to other # META-SHARE Managing Nodes (aka. inner nodes) sys.stdout.write("\nRemoving id {}...\n".format(removed_id)) removed_count += 1 _so_to_remove = StorageObject.objects.get(identifier=removed_id) if _so_to_remove.copy_status is PROXY: _rem_obj = RemovedObject.objects.create(identifier=removed_id) _rem_obj.save() remove_resource(_so_to_remove) sys.stdout.write("\n{} resources removed\n".format(removed_count))