def set_progress(state, statusmsg, rusage, context): app.signature("org.bccvl.tasks.plone.set_progress", kwargs={ 'state': state, 'message': statusmsg, 'rusage': rusage, 'context': context, }).delay()
def import_cleanup_job(results_dir, context): return app.signature("org.bccvl.tasks.plone.import_cleanup", kwargs={ 'path': results_dir, 'context': context }, immutable=True)
def import_ala_job(items, params, context): return app.signature("org.bccvl.tasks.plone.import_ala", kwargs={ 'items': items, 'results_dir': params, 'context': context }, immutable=True)
def set_progress_job(state, statusmsg, rusage, context): return app.signature("org.bccvl.tasks.plone.set_progress", kwargs={ 'state': state, 'message': statusmsg, 'rusage': rusage, 'context': context, }, immutable=True)
def import_multi_species_csv_job(url, results_dir, import_context, context): return app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': url, 'results_dir': results_dir, 'import_context': import_context, 'context': context }, immutable=True)
def update_metadata(self): uuid = self.request.form.get('uuid', None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: raise Exception("Invalid user") # build download url # 1. get context (site) relative path obj_url = obj.absolute_url() if obj.portal_type == 'org.bccvl.content.dataset': filename = obj.file.filename obj_url = '{}/@@download/file/{}'.format(obj_url, filename) elif obj.portal_type == 'org.bccvl.content.remotedataset': filename = os.path.basename(obj.remoteUrl) obj_url = '{}/@@download/{}'.format(obj_url, filename) elif obj.portal_type == 'org.bccvl.content.multispeciesdataset': filename = obj.file.filename obj_url = '{}/@@download/file/{}'.format(obj_url, filename) else: raise Exception("Wrong content type") from org.bccvl.tasks.celery import app update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': obj_url, 'filename': filename, 'contenttype': obj.format, 'context': { 'context': '/'.join(obj.getPhysicalPath()), 'user': user, } }, immutable=True) from org.bccvl.tasks.plone import after_commit_task after_commit_task(update_task) # track background job state jt = IJobTracker(obj) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=obj.dataSource, type=obj.portal_type) jt.set_progress('PENDING', 'Metadata update pending') return job.id except Exception as e: LOG.error('Caught exception %s', e) raise NotFound(self, 'update_metadata', self.request)
def export_result(self, serviceid): # self.context should be a result if not hasattr(self.context, 'job_params'): raise NotFound(self.context, self.context.title, self.request) # TODO: validate serviceid # start export job context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # collect list of files to export: urllist = [] for content in self.context.values(): if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'): # skip non datasets continue dlinfo = IDownloadInfo(content) urllist.append(dlinfo['url']) # add mets.xml urllist.append('{}/mets.xml'.format(self.context.absolute_url())) # add prov.ttl urllist.append('{}/prov.ttl'.format(self.context.absolute_url())) from org.bccvl.tasks.celery import app from org.bccvl.tasks.plone import after_commit_task # FIXME: Do mapping from serviceid to service type? based on interface # background task will need serviceid and type, but it may resolve # servicetype via API with serviceid export_task = app.signature( "org.bccvl.tasks.export_services.export_result", kwargs={ 'siteurl': api.portal.get().absolute_url(), 'fileurls': urllist, 'serviceid': serviceid, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # queue job submission after_commit_task(export_task) # self.new_job('TODO: generate id', 'generate taskname: export_result') # self.set_progress('PENDING', u'Result export pending') status = 'info' message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format( self.context.title) IStatusMessage(self.request).add(message, type=status) nexturl = self.request.get('HTTP-REFERER') if not nexturl: # this method should only be called on a result folder # we should be able to safely redirect back to the pacent # experiment nexturl = self.context.__parent__.absolute_url() self.request.response.redirect(nexturl, 307) return (status, message)
def export_result(self, serviceid): # self.context should be a result if not hasattr(self.context, 'job_params'): raise NotFound(self.context, self.context.title, self.request) # TODO: validate serviceid # start export job context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # collect list of files to export: urllist = [] for content in self.context.values(): if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'): # skip non datasets continue dlinfo = IDownloadInfo(content) urllist.append(dlinfo['url']) # add mets.xml urllist.append('{}/mets.xml'.format(self.context.absolute_url())) # add prov.ttl urllist.append('{}/prov.ttl'.format(self.context.absolute_url())) # add experiment metadata urllist.append('{}/expmetadata.txt'.format( self.context.absolute_url())) from org.bccvl.tasks.celery import app from org.bccvl.tasks.plone import after_commit_task # FIXME: Do mapping from serviceid to service type? based on interface # background task will need serviceid and type, but it may resolve # servicetype via API with serviceid export_task = app.signature( "org.bccvl.tasks.export_services.export_result", kwargs={ 'siteurl': api.portal.get().absolute_url(), 'fileurls': urllist, 'serviceid': serviceid, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # queue job submission after_commit_task(export_task) # self.new_job('TODO: generate id', 'generate taskname: export_result') # self.set_progress('PENDING', u'Result export pending') status = 'info' message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format( self.context.title) IStatusMessage(self.request).add(message, type=status) nexturl = self.request.get('HTTP-REFERER') if not nexturl: # this method should only be called on a result folder # we should be able to safely redirect back to the pacent # experiment nexturl = self.context.__parent__.absolute_url() self.request.response.redirect(nexturl, 307) return (status, message)
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility(IRegistry).forInterface( ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination( 'swift+{}'.format(new_object.remoteUrl), settings={ 'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_project_name': swiftopts.get('os_project_name'), 'os_storage_url': swiftopts.get('os_storage_url'), 'os_user_domain_name': swiftopts.get('os_user_domain_name'), 'os_project_domain_name': swiftopts.get('os_project_domain_name'), 'auth_version': swiftopts.get('auth_version') } }) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def import_cleanup(results_dir, context): app.signature("org.bccvl.tasks.plone.import_cleanup", kwargs={ 'path': results_dir, 'context': context }).delay()
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(container, self.request), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv') job.type = new_object.portal_type jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # 4. update task chain src_url = 'scp://{uid}@{ip}:{port}{file}'.format( uid=pwd.getpwuid(os.getuid()).pw_name, ip=get_hostname(self.request), port=os.environ.get('SSH_PORT', 22), file=tmpfile) dest_url = 'swift+{}'.format(new_object.remoteUrl) move_task = app.signature( 'org.bccvl.tasks.datamover.tasks.move', kwargs={ 'move_args': [(src_url, dest_url)], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) cleanup_task = app.signature( 'org.bccvl.tasks.plone.import_cleanup', kwargs={ 'path': os.path.dirname(tmpfile), 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) update_task = move_task | update_task | cleanup_task # need some more workflow states here to support e.g. zip file upload (multiple rasters), # give user a chance to better define metadata # make sure update_metadata does not change user edited metadata # -> layer, unit, projection, whatever # FIXME: clean up tmp upload directory as well # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata') job.type = new_object.portal_type jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def update_metadata(self): uuid = self.request.form.get("uuid", None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), } else: raise Exception("Invalid user") # build download url # 1. get context (site) relative path obj_url = obj.absolute_url() if obj.portal_type == "org.bccvl.content.dataset": filename = obj.file.filename obj_url = "{}/@@download/file/{}".format(obj_url, filename) elif obj.portal_type == "org.bccvl.content.remotedataset": filename = os.path.basename(obj.remoteUrl) obj_url = "{}/@@download/{}".format(obj_url, filename) elif obj.portal_type == "org.bccvl.content.multispeciesdataset": filename = obj.file.filename obj_url = "{}/@@download/file/{}".format(obj_url, filename) else: raise Exception("Wrong content type") from org.bccvl.tasks.celery import app update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ "url": obj_url, "filename": filename, "contenttype": obj.format, "context": {"context": "/".join(obj.getPhysicalPath()), "user": user}, }, immutable=True, ) from org.bccvl.tasks.plone import after_commit_task after_commit_task(update_task) # track background job state jt = IJobTracker(obj) job = jt.new_job("TODO: generate id", "generate taskname: update_metadata") job.type = obj.portal_type jt.set_progress("PENDING", "Metadata update pending") return job.id except Exception as e: LOG.error("Caught exception %s", e) raise NotFound(self, "update_metadata", self.request)
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination('swift+{}'.format(new_object.remoteUrl), settings={'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_tenant_name': swiftopts.get('os_tenant_name'), 'os_storage_url': swiftopts.get('os_storage_url') }} ) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()