def testprojection(result, toolkit): # submit test_job into queue member = api.user.get_current() params = { 'result': { 'results_dir': tempfile.mkdtemp(), 'outputs': { 'files': { 'proj_*.tif': { 'title': 'Future Projection', 'genre': 'DataGenreFP', 'mimetype': 'image/geotiff', } } } }, } context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname'), }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: create chain with import task? # ... check what happens if task fails (e.g. remove 'experiment' in context) after_commit_task(testjob, params, context)
def execute(result, func): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ params = get_project_params(result) script = generate_project_script() ### plone context for this job member = api.user.get_current() context = { "context": "/".join(result.getPhysicalPath()), "user": { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), }, "experiment": {"title": result.__parent__.title, "url": result.__parent__.absolute_url()}, } ### add result infos params["result"] = {"results_dir": "scp://[email protected]" + tempfile.mkdtemp(), "outputs": OUTPUTS} params["worker"]["script"] = {"name": "projection.R", "script": script} # set debug flag params["worker"]["zipworkenv"] = api.env.debug_mode() after_commit_task(r_task, params, context)
def testtraits(result, toolkit): # submit test_job into queue member = api.user.get_current() params = { 'result': { 'results_dir': tempfile.mkdtemp(), 'outputs': { 'files': { "*.RData": { "title": "R Species Traits Model object", "genre": "DataGenreSTModel", "mimetype": "application/x-r-data" } } } }, } context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname'), }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: create chain with import task? # ... check what happens if task fails (e.g. remove 'experiment' in context) after_commit_task(testjob, params, context)
def execute(result, toolkit): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_traits_params(result) script = generate_traits_script(toolkit.script) # plone context for this job member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: quick fix Decimal json encoding through celery (where is my custom # json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add result infos params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() after_commit_task(r_task, params, context)
def execute(result, toolkit, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_traits_params(result) script = generate_traits_script(toolkit.script) # plone context for this job member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: quick fix Decimal json encoding through celery (where is my custom # json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add result infos params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() after_commit_task(r_task, priority, params, context)
def execute_sdm(result, toolkit, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_toolkit_params(result) script = generate_sdm_script(toolkit.script) ###### generate plone context infos member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } ##### complete job infos params['result'] = { # FIXME: not optimal to access request this way # rather pass in as parameter 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue # TODO: define job chain here (and in other methods as well) after_commit_task(r_task, priority, params, context)
def execute_sdm(result, toolkit): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ try: OUTPUTS = json.loads(toolkit.output) except (ValueError, TypeError) as e: LOG.fatal("couldn't load OUTPUT from toolkit %s: %s", toolkit.getId(), e) OUTPUTS = {} params = get_toolkit_params(result) script = generate_sdm_script(toolkit.script) ###### generate plone context infos member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } ##### complete job infos params['result'] = { # FIXME: not optimal to access request this way # rather pass in as parameter 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': '{}.R'.format(toolkit.getId()), 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue # TODO: define job chain here (and in other methods as well) after_commit_task(r_task, params, context)
def execute(result, toolkit, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ # FIXME: ensemble is not yet a content based toolkit # try: # OUTPUTS = json.loads(toolkit.output) # except (ValueError, TypeError) as e: # LOG.fatal("couldn't load OUTPUT form toolkit %s: %s", # toolkit.getId(), e) # OUTPUTS = {} params = get_ensemble_params(result) script = generate_ensemble_script() member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': {'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': {'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': OUTPUTS } params['worker']['script'] = { 'name': 'ensemble.R', 'script': script, } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue after_commit_task(r_task, priority, params, context)
def execute(result, toolkit): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ # FIXME: ensemble is not yet a content based toolkit # try: # OUTPUTS = json.loads(toolkit.output) # except (ValueError, TypeError) as e: # LOG.fatal("couldn't load OUTPUT form toolkit %s: %s", # toolkit.getId(), e) # OUTPUTS = {} params = get_ensemble_params(result) script = generate_ensemble_script() member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } params['result'] = {'results_dir': tempfile.mkdtemp(), 'outputs': OUTPUTS} params['worker']['script'] = { 'name': 'ensemble.R', 'script': script, } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() ### send job to queue after_commit_task(r_task, params, context)
def execute(result, func, priority=HIGH_PRIORITY): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ params = get_project_params(result) script = generate_project_script() ### plone context for this job member = api.user.get_current() context = { 'context': '/'.join(result.getPhysicalPath()), 'user': {'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, 'experiment': {'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } ### add result infos params['result'] = { 'results_dir': get_results_dir(result, result.REQUEST), 'outputs': get_output(result.job_params['function']) } params['worker']['script'] = { 'name': 'projection.R', 'script': script } # set debug flag params['worker']['zipworkenv'] = api.env.debug_mode() after_commit_task(r_task, priority, params, context)
def execute(result, toolkit): """ This function takes an experiment and executes. It usesenvirnoment variables WORKER_DIR or HOME as root folder to execute experiments. After the execution finishes the output files will be attached to the experiment. :param experiment: The experiment holding the configuration and receiving the results :type experiment: org.bccvl.site.content.IExperiment """ # FIXME: ensemble is not yet a content based toolkit # try: # OUTPUTS = json.loads(toolkit.output) # except (ValueError, TypeError) as e: # LOG.fatal("couldn't load OUTPUT form toolkit %s: %s", # toolkit.getId(), e) # OUTPUTS = {} params = get_ensemble_params(result) script = generate_ensemble_script() member = api.user.get_current() context = { "context": "/".join(result.getPhysicalPath()), "user": { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), }, "experiment": {"title": result.__parent__.title, "url": result.__parent__.absolute_url()}, } params["result"] = {"results_dir": "scp://[email protected]" + tempfile.mkdtemp(), "outputs": OUTPUTS} params["worker"]["script"] = {"name": "ensemble.R", "script": script} # set debug flag params["worker"]["zipworkenv"] = api.env.debug_mode() ### send job to queue after_commit_task(r_task, params, context)
def start_job(self): if self.is_active(): return 'error', u'Current Job is still running' # The dataset object already exists and should have all required metadata md = IBCCVLMetadata(self.context) # TODO: this assumes we have an lsid in the metadata # should check for it lsid = md['species']['taxonID'] # we need site-path, context-path and lsid for this job #site_path = '/'.join(api.portal.get().getPhysicalPath()) context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # a folder for the datamover to place files in tmpdir = tempfile.mkdtemp() # ala_import will be submitted after commit, so we won't get a # result here ala_import_task = ala_import( lsid, tmpdir, { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }) # TODO: add title, and url for dataset? (like with experiments?) # update provenance self._createProvenance(self.context) after_commit_task(ala_import_task) # FIXME: we don't have a backend task id here as it will be started # after commit, when we shouldn't write anything to the db # maybe add another callback to set task_id? self.new_job('TODO: generate id', 'generate taskname: ala_import') self.set_progress('PENDING', u'ALA import pending') return 'info', u'Job submitted {0} - {1}'.format( self.context.title, self.state)
def export_result(self, serviceid): # self.context should be a result if not hasattr(self.context, 'job_params'): raise NotFound(self.context, self.context.title, self.request) # TODO: validate serviceid # start export job context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() zipurl = self.context.absolute_url() + '/resultdownload' from org.bccvl.tasks.result_export import result_export from org.bccvl.tasks.plone import after_commit_task export_task = result_export( zipurl, serviceid, {'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }}) # queue job submission after_commit_task(export_task) # self.new_job('TODO: generate id', 'generate taskname: export_result') # self.set_progress('PENDING', u'Result export pending') status = 'info' message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format(self.context.title) IStatusMessage(self.request).add(message, type=status) nexturl = self.request.get('HTTP-REFERER') if not nexturl: # this method should only be called on a result folder # we should be able to safely redirect back to the pacent experiment nexturl = self.context.__parent__.absolute_url() self.request.response.redirect(nexturl, 307) return (status, message)
def testsdm(result, toolkit): # submit test_job into queue member = api.user.get_current() params = { 'result': { 'results_dir': tempfile.mkdtemp(), 'outputs': { 'files': { '*.RData': { 'title': 'Test SDM Model', 'genre': 'DataGenreSDMModel', 'mimetype': 'application/x-r-data' }, "proj_*.tif": { "title": "Projection to current", "genre": "DataGenreCP", "mimetype": "image/geotiff" }, } } }, } context = { 'context': '/'.join(result.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname'), }, 'experiment': { 'title': result.__parent__.title, 'url': result.__parent__.absolute_url() } } # TODO: create chain with import task? # ... check what happens if task fails (e.g. remove 'experiment' in context) after_commit_task(testjob, params, context)
def start_job(self): if self.is_active(): return 'error', u'Current Job is still running' # The dataset object already exists and should have all required metadata md = IBCCVLMetadata(self.context) # TODO: this assumes we have an lsid in the metadata # should check for it lsid = md['species']['taxonID'] # we need site-path, context-path and lsid for this job #site_path = '/'.join(api.portal.get().getPhysicalPath()) context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # a folder for the datamover to place files in tmpdir = tempfile.mkdtemp() # ala_import will be submitted after commit, so we won't get a # result here ala_import_task = ala_import( lsid, tmpdir, {'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }}) # TODO: add title, and url for dataset? (like with experiments?) # update provenance self._createProvenance(self.context) after_commit_task(ala_import_task) # FIXME: we don't have a backend task id here as it will be started # after commit, when we shouldn't write anything to the db # maybe add another callback to set task_id? self.new_job('TODO: generate id', 'generate taskname: ala_import') self.set_progress('PENDING', u'ALA import pending') return 'info', u'Job submitted {0} - {1}'.format(self.context.title, self.state)
def export_result(self, serviceid): # self.context should be a result if not hasattr(self.context, 'job_params'): raise NotFound(self.context, self.context.title, self.request) # TODO: validate serviceid # start export job context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # collect list of files to export: urllist = [] for content in self.context.values(): if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'): # skip non datasets continue dlinfo = IDownloadInfo(content) urllist.append(dlinfo['url']) # add mets.xml urllist.append('{}/mets.xml'.format(self.context.absolute_url())) # add prov.ttl urllist.append('{}/prov.ttl'.format(self.context.absolute_url())) from org.bccvl.tasks.celery import app from org.bccvl.tasks.plone import after_commit_task # FIXME: Do mapping from serviceid to service type? based on interface # background task will need serviceid and type, but it may resolve # servicetype via API with serviceid export_task = app.signature( "org.bccvl.tasks.export_services.export_result", kwargs={ 'siteurl': api.portal.get().absolute_url(), 'fileurls': urllist, 'serviceid': serviceid, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # queue job submission after_commit_task(export_task) # self.new_job('TODO: generate id', 'generate taskname: export_result') # self.set_progress('PENDING', u'Result export pending') status = 'info' message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format( self.context.title) IStatusMessage(self.request).add(message, type=status) nexturl = self.request.get('HTTP-REFERER') if not nexturl: # this method should only be called on a result folder # we should be able to safely redirect back to the pacent # experiment nexturl = self.context.__parent__.absolute_url() self.request.response.redirect(nexturl, 307) return (status, message)
def export_result(self, serviceid): # self.context should be a result if not hasattr(self.context, 'job_params'): raise NotFound(self.context, self.context.title, self.request) # TODO: validate serviceid # start export job context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # collect list of files to export: urllist = [] for content in self.context.values(): if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'): # skip non datasets continue dlinfo = IDownloadInfo(content) urllist.append(dlinfo['url']) # add mets.xml urllist.append('{}/mets.xml'.format(self.context.absolute_url())) # add prov.ttl urllist.append('{}/prov.ttl'.format(self.context.absolute_url())) # add experiment metadata urllist.append('{}/expmetadata.txt'.format( self.context.absolute_url())) from org.bccvl.tasks.celery import app from org.bccvl.tasks.plone import after_commit_task # FIXME: Do mapping from serviceid to service type? based on interface # background task will need serviceid and type, but it may resolve # servicetype via API with serviceid export_task = app.signature( "org.bccvl.tasks.export_services.export_result", kwargs={ 'siteurl': api.portal.get().absolute_url(), 'fileurls': urllist, 'serviceid': serviceid, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # queue job submission after_commit_task(export_task) # self.new_job('TODO: generate id', 'generate taskname: export_result') # self.set_progress('PENDING', u'Result export pending') status = 'info' message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format( self.context.title) IStatusMessage(self.request).add(message, type=status) nexturl = self.request.get('HTTP-REFERER') if not nexturl: # this method should only be called on a result folder # we should be able to safely redirect back to the pacent # experiment nexturl = self.context.__parent__.absolute_url() self.request.response.redirect(nexturl, 307) return (status, message)
def update_metadata(self): uuid = self.request.form.get('uuid', None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: raise Exception("Invalid user") # build download url # 1. get context (site) relative path obj_url = obj.absolute_url() if obj.portal_type == 'org.bccvl.content.dataset': filename = obj.file.filename obj_url = '{}/@@download/file/{}'.format(obj_url, filename) elif obj.portal_type == 'org.bccvl.content.remotedataset': filename = os.path.basename(obj.remoteUrl) obj_url = '{}/@@download/{}'.format(obj_url, filename) elif obj.portal_type == 'org.bccvl.content.multispeciesdataset': filename = obj.file.filename obj_url = '{}/@@download/file/{}'.format(obj_url, filename) else: raise Exception("Wrong content type") from org.bccvl.tasks.celery import app update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': obj_url, 'filename': filename, 'contenttype': obj.format, 'context': { 'context': '/'.join(obj.getPhysicalPath()), 'user': user, } }, immutable=True) from org.bccvl.tasks.plone import after_commit_task after_commit_task(update_task) # track background job state jt = IJobTracker(obj) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=obj.dataSource, type=obj.portal_type) jt.set_progress('PENDING', 'Metadata update pending') return job.id except Exception as e: LOG.error('Caught exception %s', e) raise NotFound(self, 'update_metadata', self.request)
def demosdm(self): lsid = self.request.form.get('lsid') # Run SDM on a species given by lsid (from ALA), followed by a Climate # Change projection. if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': raise BadRequest('Request must be POST') # Swift params swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) # get parameters if not lsid: raise BadRequest('Required parameter lsid missing') # we have an lsid,.... we can't really verify but at least some # data is here # find rest of parameters # FIXME: hardcoded path to environmental datasets # Get the future climate for climate change projection portal = ploneapi.portal.get() dspath = '/'.join([defaults.DATASETS_FOLDER_ID, defaults.DATASETS_CLIMATE_FOLDER_ID, 'australia', 'australia_1km', 'RCP85_ukmo-hadgem1_2085.zip']) ds = portal.restrictedTraverse(dspath) dsuuid = IUUID(ds) dlinfo = IDownloadInfo(ds) dsmd = IBCCVLMetadata(ds) futureclimatelist = [] for layer in ('B05', 'B06', 'B13', 'B14'): futureclimatelist.append({ 'uuid': dsuuid, 'filename': dlinfo['filename'], 'downloadurl': dlinfo['url'], 'layer': layer, 'type': dsmd['layers'][layer]['datatype'], 'zippath': dsmd['layers'][layer]['filename'] }) # Climate change projection name cc_projection_name = os.path.splitext(dlinfo['filename'])[0] # Get the current climate for SDM dspath = '/'.join([defaults.DATASETS_FOLDER_ID, defaults.DATASETS_CLIMATE_FOLDER_ID, 'australia', 'australia_1km', 'current.76to05.zip']) ds = portal.restrictedTraverse(dspath) dsuuid = IUUID(ds) dlinfo = IDownloadInfo(ds) dsmd = IBCCVLMetadata(ds) envlist = [] for layer in ('B05', 'B06', 'B13', 'B14'): envlist.append({ 'uuid': dsuuid, 'filename': dlinfo['filename'], 'downloadurl': dlinfo['url'], 'layer': layer, 'type': dsmd['layers'][layer]['datatype'], 'zippath': dsmd['layers'][layer]['filename'] }) # FIXME: we don't use a IJobTracker here for now # get toolkit and func = portal[defaults.TOOLKITS_FOLDER_ID]['demosdm'] # build job_params: job_params = { 'resolution': IBCCVLMetadata(ds)['resolution'], 'function': func.getId(), 'species_occurrence_dataset': { 'uuid': 'ala_occurrence_dataset', 'species': u'demoSDM', 'downloadurl': 'ala://ala?lsid={}'.format(lsid), }, 'environmental_datasets': envlist, 'future_climate_datasets': futureclimatelist, 'cc_projection_name': cc_projection_name } # add toolkit parameters: (all default values) # get toolkit schema schema = loadString(func.schema).schema for name, field in getFields(schema).items(): if field.default is not None: job_params[name] = field.default # add other default parameters job_params.update({ 'rescale_all_models': False, 'selected_models': 'all', 'modeling_id': 'bccvl', }) # generate script to run script = u'\n'.join([ resource_string('org.bccvl.compute', 'rscripts/bccvl.R'), resource_string('org.bccvl.compute', 'rscripts/eval.R'), func.script]) # where to store results. result = { 'results_dir': 'swift+{}/wordpress/{}/'.format(swiftsettings.storage_url, urllib.quote_plus(lsid)), 'outputs': json.loads(func.output) } # worker hints: worker = { 'script': { 'name': '{}.R'.format(func.getId()), 'script': script }, 'files': ( 'species_occurrence_dataset', 'environmental_datasets', 'future_climate_datasets' ) } # put everything together jobdesc = { 'env': {}, 'params': job_params, 'worker': worker, 'result': result, } # create job jobtool = getUtility(IJobUtility) job = jobtool.new_job( lsid=lsid, toolkit=IUUID(func), function=func.getId(), type='demosdm' ) # create job context object member = ploneapi.user.get_current() context = { # we use the site object as context 'context': '/'.join(portal.getPhysicalPath()), 'jobid': job.id, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, } # all set to go build task chain now from org.bccvl.tasks.compute import demo_task from org.bccvl.tasks.plone import after_commit_task, HIGH_PRIORITY after_commit_task(demo_task, HIGH_PRIORITY, jobdesc, context) # let's hope everything works, return result # We don't create an experiment object, so we don't count stats here # let's do it manually getUtility(IStatsUtility).count_experiment( user=member.getId(), portal_type='demosdm', ) return { 'state': os.path.join(result['results_dir'], 'state.json'), 'result': os.path.join(result['results_dir'], 'proj_metadata.json'), 'jobid': job.id }
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(container, self.request), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv') job.type = new_object.portal_type jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # 4. update task chain src_url = 'scp://{uid}@{ip}:{port}{file}'.format( uid=pwd.getpwuid(os.getuid()).pw_name, ip=get_hostname(self.request), port=os.environ.get('SSH_PORT', 22), file=tmpfile) dest_url = 'swift+{}'.format(new_object.remoteUrl) move_task = app.signature( 'org.bccvl.tasks.datamover.tasks.move', kwargs={ 'move_args': [(src_url, dest_url)], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) cleanup_task = app.signature( 'org.bccvl.tasks.plone.import_cleanup', kwargs={ 'path': os.path.dirname(tmpfile), 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) update_task = move_task | update_task | cleanup_task # need some more workflow states here to support e.g. zip file upload (multiple rasters), # give user a chance to better define metadata # make sure update_metadata does not change user edited metadata # -> layer, unit, projection, whatever # FIXME: clean up tmp upload directory as well # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata') job.type = new_object.portal_type jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def update_metadata(self): uuid = self.request.form.get("uuid", None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), } else: raise Exception("Invalid user") # build download url # 1. get context (site) relative path obj_url = obj.absolute_url() if obj.portal_type == "org.bccvl.content.dataset": filename = obj.file.filename obj_url = "{}/@@download/file/{}".format(obj_url, filename) elif obj.portal_type == "org.bccvl.content.remotedataset": filename = os.path.basename(obj.remoteUrl) obj_url = "{}/@@download/{}".format(obj_url, filename) elif obj.portal_type == "org.bccvl.content.multispeciesdataset": filename = obj.file.filename obj_url = "{}/@@download/file/{}".format(obj_url, filename) else: raise Exception("Wrong content type") from org.bccvl.tasks.celery import app update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ "url": obj_url, "filename": filename, "contenttype": obj.format, "context": {"context": "/".join(obj.getPhysicalPath()), "user": user}, }, immutable=True, ) from org.bccvl.tasks.plone import after_commit_task after_commit_task(update_task) # track background job state jt = IJobTracker(obj) job = jt.new_job("TODO: generate id", "generate taskname: update_metadata") job.type = obj.portal_type jt.set_progress("PENDING", "Metadata update pending") return job.id except Exception as e: LOG.error("Caught exception %s", e) raise NotFound(self, "update_metadata", self.request)
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination('swift+{}'.format(new_object.remoteUrl), settings={'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_tenant_name': swiftopts.get('os_tenant_name'), 'os_storage_url': swiftopts.get('os_storage_url') }} ) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility(IRegistry).forInterface( ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination( 'swift+{}'.format(new_object.remoteUrl), settings={ 'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_project_name': swiftopts.get('os_project_name'), 'os_storage_url': swiftopts.get('os_storage_url'), 'os_user_domain_name': swiftopts.get('os_user_domain_name'), 'os_project_domain_name': swiftopts.get('os_project_domain_name'), 'auth_version': swiftopts.get('auth_version') } }) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()