def update_bag(self): """ Update a bag if necessary. This uses the Django signal pre_check_bag_flag to prepare collections, and then checks the AVUs 'metadata_dirty' and 'bag_modified' to determine whether to regenerate the metadata files and/or bag. This is a synchronous update. The call waits until the update is finished. """ from hs_core.tasks import create_bag_by_irods from hs_core.hydroshare.resource import check_resource_type from hs_core.hydroshare.hs_bagit import create_bag_metadata_files # send signal for pre_check_bag_flag resource_cls = check_resource_type(self.resource_type) pre_check_bag_flag.send(sender=resource_cls, resource=self) metadata_dirty = self.getAVU('metadata_dirty') bag_modified = self.getAVU('bag_modified') if metadata_dirty: # automatically cast to Bool create_bag_metadata_files(self) self.setAVU('metadata_dirty', False) # the ticket system does synchronous bag creation. # async bag creation isn't supported. if bag_modified: # automatically cast to Bool create_bag_by_irods(self.short_id) self.setAVU('bag_modified', False)
def update_metadata_files(self): """ Make the metadata files resourcemetadata.xml and resourcemap.xml up to date. This checks the "metadata dirty" AVU before updating files if necessary. """ from hs_core.hydroshare.hs_bagit import create_bag_metadata_files metadata_dirty = self.getAVU('metadata_dirty') if metadata_dirty: create_bag_metadata_files(self) self.setAVU('metadata_dirty', False)
def create_bag_by_irods(resource_id, create_zip=True): """Create a resource bag on iRODS side by running the bagit rule and ibun zip. This function runs as a celery task, invoked asynchronously so that it does not block the main web thread when it creates bags for very large files which will take some time. :param resource_id: the resource uuid that is used to look for the resource to create the bag for. :param create_zip: defaults to True, set to false to create bagit files without zipping :return: bag_url if bag creation operation succeeds or raise an exception if resource does not exist or any other issues that prevent bags from being created. """ res = utils.get_resource_by_shortkey(resource_id) istorage = res.get_irods_storage() bag_path = res.bag_path metadata_dirty = res.getAVU('metadata_dirty') metadata_dirty = metadata_dirty is None or metadata_dirty # if metadata has been changed, then regenerate metadata xml files if metadata_dirty: create_bag_metadata_files(res) bag_modified = res.getAVU("bag_modified") bag_modified = bag_modified is None or bag_modified if metadata_dirty or bag_modified: create_bagit_files_by_irods(res, istorage) res.setAVU("bag_modified", False) if create_zip: irods_bagit_input_path = res.get_irods_path(resource_id, prepend_short_id=False) # only proceed when the resource is not deleted potentially by another request # when being downloaded is_exist = istorage.exists(irods_bagit_input_path) if is_exist: try: if istorage.exists(bag_path): istorage.delete(bag_path) istorage.zipup(irods_bagit_input_path, bag_path) if res.raccess.published: # compute checksum to meet DataONE distribution requirement chksum = istorage.checksum(bag_path) res.bag_checksum = chksum return res.bag_url except SessionException as ex: raise SessionException(-1, '', ex.stderr) else: raise ObjectDoesNotExist( 'Resource {} does not exist.'.format(resource_id))
def create_or_update_from_package(resource, term, **kwargs): terms_dict = dict(StressPeriod='stress_period', GroundWaterFlow='ground_water_flow', BoundaryCondition='boundary_condition', ModelCalibration='model_calibration', GeneralElements='general_elements', GridDimensions='grid_dimensions', StudyArea='study_area') t = terms_dict[term.term] metadata_term_obj = getattr(resource.metadata, t) if not metadata_term_obj: resource.metadata.create_element(term.term, **kwargs) else: resource.metadata.update_element(term.term, metadata_term_obj.id, **kwargs) create_bag_metadata_files(resource)
def resource_modified(resource, by_user=None, overwrite_bag=True): """ Set an AVU flag that forces the bag to be recreated before fetch. This indicates that some content of the bag has been edited. """ if not by_user: user = None else: if isinstance(by_user, User): user = by_user else: try: user = User.objects.get(username=by_user) except User.DoesNotExist: user = None if user: resource.last_changed_by = user resource.updated = now().isoformat() # seems this is the best place to sync resource title with metadata title resource.title = resource.metadata.title.value resource.save() if resource.metadata.dates.all().filter(type='modified'): res_modified_date = resource.metadata.dates.all().filter( type='modified')[0] resource.metadata.update_element('date', res_modified_date.id) if overwrite_bag: create_bag_metadata_files(resource) # set bag_modified-true AVU pair for the modified resource in iRODS to indicate # the resource is modified for on-demand bagging. set_dirty_bag_flag(resource)
def test_create_bag_files(self): # this is the api call we are testing irods_storage_obj = hs_bagit.create_bag_metadata_files(self.test_res) self.assertTrue(isinstance(irods_storage_obj, IrodsStorage))
def check_bag(rid, options): requests.packages.urllib3.disable_warnings() try: resource = BaseResource.objects.get(short_id=rid) istorage = resource.get_irods_storage() root_exists = istorage.exists(resource.root_path) if root_exists: # print status of metadata/bag system scimeta_path = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml') scimeta_exists = istorage.exists(scimeta_path) if scimeta_exists: print("resource metadata {} found".format(scimeta_path)) else: print("resource metadata {} NOT FOUND".format(scimeta_path)) resmap_path = os.path.join(resource.root_path, 'data', 'resourcemap.xml') resmap_exists = istorage.exists(resmap_path) if resmap_exists: print("resource map {} found".format(resmap_path)) else: print("resource map {} NOT FOUND".format(resmap_path)) bag_exists = istorage.exists(resource.bag_path) if bag_exists: print("bag {} found".format(resource.bag_path)) else: print("bag {} NOT FOUND".format(resource.bag_path)) dirty = resource.getAVU('metadata_dirty') print("{}.metadata_dirty is {}".format(rid, str(dirty))) modified = resource.getAVU('bag_modified') print("{}.bag_modified is {}".format(rid, str(modified))) if options['reset']: # reset all data to pristine resource.setAVU('metadata_dirty', 'true') print("{}.metadata_dirty set to true".format(rid)) try: istorage.delete(resource.scimeta_path) print("{} deleted".format(resource.scimeta_path)) except SessionException as ex: print("{} delete failed: {}" .format(resource.scimeta_path, ex.stderr)) try: istorage.delete(resource.resmap_path) print("{} deleted".format(resource.resmap_path)) except SessionException as ex: print("{} delete failed: {}" .format(resource.resmap_path, ex.stderr)) resource.setAVU('bag_modified', 'true') print("{}.bag_modified set to true".format(rid)) try: istorage.delete(resource.bag_path) print("{} deleted".format(resource.bag_path)) except SessionException as ex: print("{} delete failed: {}" .format(resource.bag_path, ex.stderr)) if options['reset_metadata']: resource.setAVU('metadata_dirty', 'true') print("{}.metadata_dirty set to true".format(rid)) try: istorage.delete(resource.scimeta_path) print("{} deleted".format(resource.scimeta_path)) except SessionException as ex: print("delete of {} failed: {}" .format(resource.scimeta_path, ex.stderr)) try: istorage.delete(resource.resmap_path) print("{} deleted".format(resource.resmap_path)) except SessionException as ex: print("{} delete failed: {}" .format(resource.resmap_path, ex.stderr)) if options['reset_bag']: resource.setAVU('bag_modified', 'true') print("{}.bag_modified set to true".format(rid)) try: istorage.delete(resource.bag_path) print("{} deleted".format(resource.bag_path)) except SessionException as ex: print("{} delete failed: {}" .format(resource.bag_path, ex.stderr)) if options['generate']: # generate usable bag if not options['if_needed'] or dirty or not scimeta_exists or not resmap_exists: try: create_bag_metadata_files(resource) except ValueError as e: print(("{}: value error encountered: {}".format(rid, str(e)))) return print("{} metadata generated from Django".format(rid)) resource.setAVU('metadata_dirty', 'false') resource.setAVU('bag_modified', 'true') print("{}.metadata_dirty set to false".format(rid)) if not options['if_needed'] or modified or not bag_exists: create_bag_by_irods(rid) print("{} bag generated from iRODs".format(rid)) resource.setAVU('bag_modified', 'false') print("{}.bag_modified set to false".format(rid)) if options['generate_metadata']: if not options['if_needed'] or dirty or not scimeta_exists or not resmap_exists: try: create_bag_metadata_files(resource) except ValueError as e: print(("{}: value error encountered: {}".format(rid, str(e)))) return print("{}: metadata generated from Django".format(rid)) resource.setAVU('metadata_dirty', 'false') print("{}.metadata_dirty set to false".format(rid)) resource.setAVU('bag_modified', 'true') print("{}.bag_modified set to false".format(rid)) if options['generate_bag']: if not options['if_needed'] or modified or not bag_exists: create_bag_by_irods(rid) print("{}: bag generated from iRODs".format(rid)) resource.setAVU('bag_modified', 'false') print("{}.bag_modified set to false".format(rid)) if options['download_bag']: if options['password']: server = getattr(settings, 'FQDN_OR_IP', 'www.hydroshare.org') uri = "https://{}/hsapi/resource/{}/".format(server, rid) print("download uri is {}".format(uri)) r = hs_requests.get(uri, verify=False, stream=True, auth=requests.auth.HTTPBasicAuth(options['login'], options['password'])) print("download return status is {}".format(str(r.status_code))) print("redirects:") for thing in r.history: print("...url: {}".format(thing.url)) filename = 'tmp/check_bag_block' with open(filename, 'wb') as fd: for chunk in r.iter_content(chunk_size=128): fd.write(chunk) else: print("cannot download bag without username and password.") if options['open_bag']: if options['password']: server = getattr(settings, 'FQDN_OR_IP', 'www.hydroshare.org') uri = "https://{}/hsapi/resource/{}/".format(server, rid) print("download uri is {}".format(uri)) r = hs_requests.get(uri, verify=False, stream=True, auth=requests.auth.HTTPBasicAuth(options['login'], options['password'])) print("download return status is {}".format(str(r.status_code))) print("redirects:") for thing in r.history: print("...url: {}".format(thing.url)) filename = 'tmp/check_bag_block' with open(filename, 'wb') as fd: for chunk in r.iter_content(chunk_size=128): fd.write(chunk) break else: print("cannot open bag without username and password.") else: print("Resource with id {} does not exist in iRODS".format(rid)) except BaseResource.DoesNotExist: print("Resource with id {} NOT FOUND in Django".format(rid))
def put(self, request, pk): # Update science metadata based on resourcemetadata.xml uploaded resource, authorized, user = view_utils.authorize( request, pk, needed_permission=ACTION_TO_AUTHORIZE.EDIT_RESOURCE, raises_exception=False) if not authorized: raise PermissionDenied() files = list(request.FILES.values()) if len(files) == 0: error_msg = { 'file': 'No resourcemetadata.xml file was found to update resource ' 'metadata.' } raise ValidationError(detail=error_msg) elif len(files) > 1: error_msg = { 'file': ('More than one file was found. Only one file, named ' 'resourcemetadata.xml, ' 'can be used to update resource metadata.') } raise ValidationError(detail=error_msg) scimeta = files[0] if scimeta.content_type not in self.ACCEPT_FORMATS: error_msg = { 'file': ("Uploaded file has content type {t}, " "but only these types are accepted: {e}.").format( t=scimeta.content_type, e=",".join(self.ACCEPT_FORMATS)) } raise ValidationError(detail=error_msg) expect = 'resourcemetadata.xml' if scimeta.name != expect: error_msg = { 'file': "Uploaded file has name {n}, but expected {e}.".format( n=scimeta.name, e=expect) } raise ValidationError(detail=error_msg) # Temp directory to store resourcemetadata.xml tmp_dir = tempfile.mkdtemp() try: # Fake the bag structure so that GenericResourceMeta.read_metadata_from_resource_bag # can read and validate the system and science metadata for us. bag_data_path = os.path.join(tmp_dir, 'data') os.mkdir(bag_data_path) # Copy new science metadata to bag data path scimeta_path = os.path.join(bag_data_path, 'resourcemetadata.xml') shutil.copy(scimeta.temporary_file_path(), scimeta_path) # Copy existing resource map to bag data path # (use a file-like object as the file may be in iRODS, so we can't # just copy it to a local path) resmeta_path = os.path.join(bag_data_path, 'resourcemap.xml') with open(resmeta_path, 'wb') as resmeta: storage = get_file_storage() resmeta_irods = storage.open(AbstractResource.sysmeta_path(pk)) shutil.copyfileobj(resmeta_irods, resmeta) resmeta_irods.close() try: # Read resource system and science metadata domain = Site.objects.get_current().domain rm = GenericResourceMeta.read_metadata_from_resource_bag( tmp_dir, hydroshare_host=domain) # Update resource metadata rm.write_metadata_to_resource(resource, update_title=True, update_keywords=True) create_bag_metadata_files(resource) except HsDeserializationDependencyException as e: msg = ( "HsDeserializationDependencyException encountered when updating " "science metadata for resource {pk}; depedent resource was {dep}." ) msg = msg.format(pk=pk, dep=e.dependency_resource_id) logger.error(msg) raise ValidationError(detail=msg) except HsDeserializationException as e: raise ValidationError(detail=str(e)) resource_modified(resource, request.user, overwrite_bag=False) return Response(data={'resource_id': pk}, status=status.HTTP_202_ACCEPTED) finally: shutil.rmtree(tmp_dir)