def __createExpmetadata(self, job_params): # To do: add other R package versions dynamically # Get experiment title self.md['Model specifications'] = { 'Title': self.context.title, 'Date/time run': self.context.creation_date.__str__(), 'Description': self.context.description or '' } # iterate over all input datasets and add them as entities self.md['Input datasets:'] = {} for key in ('species_occurrence_dataset', 'species_absence_dataset', 'traits_dataset'): spmd = {} if not job_params.has_key(key): continue dsbrain = uuidToCatalogBrain(job_params[key]) if not dsbrain: continue ds = dsbrain.getObject() mdata = IBCCVLMetadata(ds) if mdata and mdata.get('rows', None): spmd = {'Title': "{} ({})".format(ds.title, mdata.get('rows'))} else: spmd = {'Title': ds.title} info = IDownloadInfo(ds) spmd['Download URL'] = info['url'] coll = ds while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ spmd['Description'] = ds.description or coll.description or '' attribution = ds.attribution or getattr(coll, 'attribution') or '' if isinstance(attribution, list): attribution = '\n'.join([att.raw for att in attribution]) spmd['Attribution'] = attribution self.md['Input datasets:'][key] = spmd key = 'traits_dataset_params' if key in job_params: self.md['Input datasets:'][key] = job_params.get(key, {}) # pseudo-absence metadata. key = u"pseudo_absence_dataset" pa_file = self.context.get('pseudo_absences.csv') pa_url = "" pa_title = "" if pa_file: pa_title = pa_file.title pa_url = pa_file.absolute_url() pa_url = '{}/@@download/{}'.format(pa_url, os.path.basename(pa_url)) pamd = { 'Title': pa_title, 'Download URL': pa_url, 'Pseudo-absence Strategy': job_params.get('pa_strategy', ''), 'Pseudo-absence Ratio': str(job_params.get('pa_ratio', '')) } if job_params.get('pa_strategy', '') == 'disc': pamd['Minimum distance'] = str( job_params.get('pa_disk_min', '')) pamd['Maximum distance'] = str( job_params.get('pa_disk_max', '')) if job_params.get('pa_strategy', '') == 'sre': pamd['Quantile'] = str(job_params.get('pa_sre_quant', '')) self.md['Input datasets:'][key] = pamd for key in ['environmental_datasets', 'future_climate_datasets']: if key not in job_params: continue env_list = [] layer_vocab = getUtility(IVocabularyFactory, 'layer_source')(self.context) for uuid, layers in job_params[key].items(): ds = uuidToObject(uuid) coll = ds while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ description = ds.description or coll.description attribution = ds.attribution or getattr(coll, 'attribution') or '' if isinstance(attribution, list): attribution = '\n'.join([att.raw for att in attribution]) layer_titles = [ layer_vocab.getLayerTitle(layer) for layer in layers ] env_list.append({ 'Title': ds.title, 'Layers': u'\n'.join(layer_titles), 'Description': description, 'Attribution': attribution }) self.md['Input datasets:'][key] = env_list key = "datasets" if key in job_params: dataset_list = [] for uid in job_params[key]: dsbrain = uuidToCatalogBrain(uid) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ dataset_list.append({ 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format( ds.absolute_url(), os.path.basename(ds.absolute_url())), 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Resolution': IBCCVLMetadata(ds).get('resolution', '') }) self.md['Input datasets:'][key] = dataset_list key = 'species_distribution_models' if key in job_params: dsbrain = uuidToCatalogBrain(job_params[key]) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ # get the threshold threshold = self.context.species_distribution_models.get( source_exp.UID(), {}).get(ds.UID()) self.md['Input datasets:'][key] = { 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format( ds.absolute_url(), os.path.basename(ds.absolute_url())), 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Threshold': "{}({})".format(threshold.get('label', ''), str(threshold.get('value', ''))) } key = 'projections' if key in job_params: for pds in job_params[key]: threshold = pds.get('threshold', {}) dsbrain = uuidToCatalogBrain(pds.get('dataset')) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ self.md['Input datasets:'][key] = { 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format( ds.absolute_url(), os.path.basename(ds.absolute_url())), 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Threshold': "{}({})".format(threshold.get('label', ''), str(threshold.get('value', ''))), 'Biodiverse Cell size (m)': str(job_params.get('cluster_size', '')) } # Projection experiment does not have algorithm as input if not IProjectionExperiment.providedBy(self.context.__parent__): for key in ['function', 'algorithm']: if key in job_params: self.md['Algorithm settings:'] = { 'Algorithm Name': job_params[key], 'Configuration options': self.__algoConfigOption(job_params[key], job_params) } # Construct the text mdtext = StringIO.StringIO() for heading in [ 'BCCVL model outputs guide', 'System specifications', 'Model specifications', 'Input datasets:', 'Algorithm settings:', 'Model outputs:' ]: mdtext.write(self.__getMetadataText(heading, self.md)) return mdtext.getvalue()
def export_to_ala(self): uuid = self.request.form.get('uuid', None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: raise Exception("Invalid user") # verify dataset if obj.portal_type not in ( 'org.bccvl.content.dataset', 'org.bccvl.content.remotedataset', 'org.bccvl.content.multispeciesdataset'): raise Exception("Invalid UUID (content type)") md = IBCCVLMetadata(obj) if md.get('genre') not in ('DataGenreSpeciesOccurrence', 'DataGenreSpeciesCollection', 'DataGenreTraits'): raise Exception("Invalid UUID (data type)") # get download url dlinfo = IDownloadInfo(obj) # download file from org.bccvl import movelib from org.bccvl.movelib.utils import build_source, build_destination import tempfile destdir = tempfile.mkdtemp(prefix='export_to_ala') try: from org.bccvl.tasks.celery import app settings = app.conf.get('bccvl', {}) dest = os.path.join(destdir, os.path.basename(dlinfo['url'])) movelib.move(build_source(dlinfo['url'], user['id'], settings), build_destination('file://{}'.format(dest))) csvfile = None if dlinfo['contenttype'] == 'application/zip': # loox at 'layers' to find file within zip arc = md['layers'].keys()[0] import zipfile zf = zipfile.ZipFile(dest, 'r') csvfile = zf.open(arc, 'r') else: csvfile = open(dest, 'rb') import requests # "Accept:application/json" "Origin:http://example.com" res = requests.post(settings['ala']['sandboxurl'], files={'file': csvfile}, headers={ 'apikey': settings['ala']['apikey'], 'Accept': 'application/json' }) if res.status_code != 200: self.record_error(res.reason, res.status_code) raise Exception('Upload failed') retval = res.json() # TODO: do error checking # keys: sandboxUrl, fileName, message, error: Bool, fileId return retval finally: import shutil shutil.rmtree(destdir) except Exception as e: self.record_error(str(e), 500) raise
def demosdm(self): lsid = self.request.form.get('lsid') # Run SDM on a species given by lsid (from ALA), followed by a Climate # Change projection. if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': raise BadRequest('Request must be POST') # Swift params swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) # get parameters if not lsid: raise BadRequest('Required parameter lsid missing') # we have an lsid,.... we can't really verify but at least some # data is here # find rest of parameters # FIXME: hardcoded path to environmental datasets # Get the future climate for climate change projection portal = ploneapi.portal.get() dspath = '/'.join([defaults.DATASETS_FOLDER_ID, defaults.DATASETS_CLIMATE_FOLDER_ID, 'australia', 'australia_1km', 'RCP85_ukmo-hadgem1_2085.zip']) ds = portal.restrictedTraverse(dspath) dsuuid = IUUID(ds) dlinfo = IDownloadInfo(ds) dsmd = IBCCVLMetadata(ds) futureclimatelist = [] for layer in ('B05', 'B06', 'B13', 'B14'): futureclimatelist.append({ 'uuid': dsuuid, 'filename': dlinfo['filename'], 'downloadurl': dlinfo['url'], 'layer': layer, 'type': dsmd['layers'][layer]['datatype'], 'zippath': dsmd['layers'][layer]['filename'] }) # Climate change projection name cc_projection_name = os.path.splitext(dlinfo['filename'])[0] # Get the current climate for SDM dspath = '/'.join([defaults.DATASETS_FOLDER_ID, defaults.DATASETS_CLIMATE_FOLDER_ID, 'australia', 'australia_1km', 'current.76to05.zip']) ds = portal.restrictedTraverse(dspath) dsuuid = IUUID(ds) dlinfo = IDownloadInfo(ds) dsmd = IBCCVLMetadata(ds) envlist = [] for layer in ('B05', 'B06', 'B13', 'B14'): envlist.append({ 'uuid': dsuuid, 'filename': dlinfo['filename'], 'downloadurl': dlinfo['url'], 'layer': layer, 'type': dsmd['layers'][layer]['datatype'], 'zippath': dsmd['layers'][layer]['filename'] }) # FIXME: we don't use a IJobTracker here for now # get toolkit and func = portal[defaults.TOOLKITS_FOLDER_ID]['demosdm'] # build job_params: job_params = { 'resolution': IBCCVLMetadata(ds)['resolution'], 'function': func.getId(), 'species_occurrence_dataset': { 'uuid': 'ala_occurrence_dataset', 'species': u'demoSDM', 'downloadurl': 'ala://ala?lsid={}'.format(lsid), }, 'environmental_datasets': envlist, 'future_climate_datasets': futureclimatelist, 'cc_projection_name': cc_projection_name } # add toolkit parameters: (all default values) # get toolkit schema schema = loadString(func.schema).schema for name, field in getFields(schema).items(): if field.default is not None: job_params[name] = field.default # add other default parameters job_params.update({ 'rescale_all_models': False, 'selected_models': 'all', 'modeling_id': 'bccvl', }) # generate script to run script = u'\n'.join([ resource_string('org.bccvl.compute', 'rscripts/bccvl.R'), resource_string('org.bccvl.compute', 'rscripts/eval.R'), func.script]) # where to store results. result = { 'results_dir': 'swift+{}/wordpress/{}/'.format(swiftsettings.storage_url, urllib.quote_plus(lsid)), 'outputs': json.loads(func.output) } # worker hints: worker = { 'script': { 'name': '{}.R'.format(func.getId()), 'script': script }, 'files': ( 'species_occurrence_dataset', 'environmental_datasets', 'future_climate_datasets' ) } # put everything together jobdesc = { 'env': {}, 'params': job_params, 'worker': worker, 'result': result, } # create job jobtool = getUtility(IJobUtility) job = jobtool.new_job( lsid=lsid, toolkit=IUUID(func), function=func.getId(), type='demosdm' ) # create job context object member = ploneapi.user.get_current() context = { # we use the site object as context 'context': '/'.join(portal.getPhysicalPath()), 'jobid': job.id, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') }, } # all set to go build task chain now from org.bccvl.tasks.compute import demo_task from org.bccvl.tasks.plone import after_commit_task, HIGH_PRIORITY after_commit_task(demo_task, HIGH_PRIORITY, jobdesc, context) # let's hope everything works, return result # We don't create an experiment object, so we don't count stats here # let's do it manually getUtility(IStatsUtility).count_experiment( user=member.getId(), portal_type='demosdm', ) return { 'state': os.path.join(result['results_dir'], 'state.json'), 'result': os.path.join(result['results_dir'], 'proj_metadata.json'), 'jobid': job.id }
def export_result(self, serviceid): # self.context should be a result if not hasattr(self.context, 'job_params'): raise NotFound(self.context, self.context.title, self.request) # TODO: validate serviceid # start export job context_path = '/'.join(self.context.getPhysicalPath()) member = api.user.get_current() # collect list of files to export: urllist = [] for content in self.context.values(): if content.portal_type not in ('org.bccvl.content.dataset', 'org.bccvl.content.remotedataset'): # skip non datasets continue dlinfo = IDownloadInfo(content) urllist.append(dlinfo['url']) # add mets.xml urllist.append('{}/mets.xml'.format(self.context.absolute_url())) # add prov.ttl urllist.append('{}/prov.ttl'.format(self.context.absolute_url())) # add experiment metadata urllist.append('{}/expmetadata.txt'.format( self.context.absolute_url())) from org.bccvl.tasks.celery import app from org.bccvl.tasks.plone import after_commit_task # FIXME: Do mapping from serviceid to service type? based on interface # background task will need serviceid and type, but it may resolve # servicetype via API with serviceid export_task = app.signature( "org.bccvl.tasks.export_services.export_result", kwargs={ 'siteurl': api.portal.get().absolute_url(), 'fileurls': urllist, 'serviceid': serviceid, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # queue job submission after_commit_task(export_task) # self.new_job('TODO: generate id', 'generate taskname: export_result') # self.set_progress('PENDING', u'Result export pending') status = 'info' message = u'Export request for "{}" succesfully submitted! Please check the service and any associated email accounts to confirm the data\'s availability'.format( self.context.title) IStatusMessage(self.request).add(message, type=status) nexturl = self.request.get('HTTP-REFERER') if not nexturl: # this method should only be called on a result folder # we should be able to safely redirect back to the pacent # experiment nexturl = self.context.__parent__.absolute_url() self.request.response.redirect(nexturl, 307) return (status, message)
def CatalogBrainDownloadInfo(brain): context = brain.getObject() return IDownloadInfo(context)
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility(IRegistry).forInterface( ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination( 'swift+{}'.format(new_object.remoteUrl), settings={ 'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_tenant_name': swiftopts.get('os_tenant_name'), 'os_storage_url': swiftopts.get('os_storage_url') } }) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def get_download_info(self, item=None): if item is None: item = self.context return IDownloadInfo(item)