def handle_delete(self, action): title = self.context.Title() parent = aq_parent(aq_inner(self.context)) # Objects to be deleted dsobjs = [self.context] # FIXME: would be faster to deal with brains here instead of objects if hasattr(self.context, "parts"): dsobjs += [uuidToObject(ds) for ds in self.context.parts] stats = getUtility(IStatsUtility) for context in dsobjs: # removed file working on frontend Javascript if hasattr(context, "file"): context.file = None # FIXME: we should probably delete it instead of marking it as REMOVED jt = IJobTracker(context) if (jt.state != 'REMOVED'): jt.state = 'REMOVED' context.reindexObject() # collect stats stats.count_dataset(source=context.dataSource, portal_type=context.portal_type, state='REMOVED') ##### IStatusMessage(self.request).add( u'{0[title]} has been removed.'.format({u'title': title})) self.request.response.redirect(parent.absolute_url())
def job_progress(self, itemobj=None): if itemobj is None: itemobj = self.context progress = IJobTracker(itemobj.getObject()).progress() if progress: return progress.get('message') return None
def __iter__(self): for item in self.previous: keys = item.keys() pathkey = self.pathkey(*keys)[0] objpath = item[pathkey] obj = self.context.unrestrictedTraverse(objpath) # The contructor generated an empty object (which triggered our # stats event handler.) Let's update the stats here manually. created = IAnnotations(obj.REQUEST).get( 'org.bccvl.site.stats.created', False) if created: # our created subscriber has been invoked getUtility(IStatsUtility).count_dataset( source=obj.dataSource, portal_type=obj.portal_type) # reset the flag ... we can do that, because the pipeline runs # sequentially IAnnotations( obj.REQUEST)['org.bccvl.site.stats.created'] = False # Attach a job tracker only for species dataset from multispecies if item.get('_partof', {}): jt = IJobTracker(obj) jt.new_job('TODO: generate id', 'generate taskname: ala_import', function=obj.dataSource, type=obj.portal_type, state='COMPLETED') yield item
def set_progress(state, message, rusage, context, **kw): jobtool = getUtility(IJobUtility) if '_jobid' in kw: # TODO: should we do some security check here? # e.g. only admin and user who owns the job can update it? # TODO: jobid may not exist job = jobtool.get_job_by_id(kw['_jobid']) else: jt = IJobTracker(kw['_context']) job = jt.get_job() jobtool.set_progress(job, state, message, rusage) if state in ('COMPLETED', 'FAILED'): jobtool.set_state(job, state) LOG.info("Plone: Update job state %s", state) # FIXME: we sholud probably send emails in another place (or as additional task in chain?) # there are too many things that can go wrong here and this task is not allowed to # fail (throw an exception) otherwise the user will never see a status update # FIXME: should be a better check here, we want to send email only # for experiment results, not for dataset imports (i.e. ala) try: if 'experiment' in context: # check if this is the first or last result jt = IExperimentJobTracker(kw['_context'].__parent__) completed = [st for st in jt.states if st[1] in ('COMPLETED', 'FAILED')] first = len(completed) == 1 last = jt.state in ('COMPLETED', 'FAILED', 'FINISHED') if first or last: # send email fullname = context['user']['fullname'] email_addr = context['user']['email'] experiment_name = context['experiment']['title'] experiment_url = context['experiment']['url'] success = (job.state == 'COMPLETED') if fullname and email_addr and experiment_name and experiment_url: send_mail(fullname, email_addr, experiment_name, experiment_url, success) else: LOG.warn("Not sending email. Invalid parameters") except Exception as e: LOG.error( 'Got an exception in plone.set_progress while trying to send an email: %s', e, exc_info=True) else: jobtool.set_state(job, state) LOG.info("Plone: Update job state RUNNING") if not '_jobid' in kw: kw['_context'].reindexObject() # TODO: reindex job state only? # Compute the experiement run time if all its jobs are completed # The experiment is the parent job jt = IExperimentJobTracker(kw['_context'].__parent__, None) if jt and jt.state in ('COMPLETED', 'FAILED'): exp = jt.context exp.runtime = time.time() - (exp.created().millis() / 1000.0) LOG.info("Plone: Update job progress: %s, %s, %s", state, message, context)
def handle_delete(self, action): title = self.context.Title() parent = aq_parent(aq_inner(self.context)) # removed file working on frontend Javascript if hasattr(self.context, "file"): self.context.file = None # FIXME: we should probably delete it instead of marking it as REMOVED jt = IJobTracker(self.context) jt.state = 'REMOVED' self.context.reindexObject() ##### IStatusMessage(self.request).add( u'{0[title]} has been removed.'.format({u'title': title})) self.request.response.redirect(parent.absolute_url())
def metadata(self): uuid = self.request.form.get('uuid') exp = uuidToObject(uuid) if not exp: self.record_error('Not Found', 404, 'Experiment not found', {'parameter': 'uuid'}) raise NotFound(self, 'metadata', self.request) # we found an experiment ... let's build the result retval = { 'id': exp.id, 'uuid': IUUID(exp), 'title': exp.title, 'description': exp.description, 'job_state': IExperimentJobTracker(exp).state, 'url': exp.absolute_url(), 'results': [] } # add info about parameters? # add info about results for result in exp.values(): retval['results'].append({ 'id': result.id, 'uuid': IUUID(result), 'title': result.title, 'description': result.description, 'job_state': IJobTracker(result).state, 'url': result.absolute_url() }) return retval
def test_job_state_change(self): portal = self.layer['portal'] job = self._create_new_job() content = portal['d1'] job.content = IUUID(content) self.assertEqual(job.state, 'PENDING') job_tool = getUtility(IJobUtility) job_tool.reindex_job(job) # get job tracker for content jt = IJobTracker(content) # check if we get the same job self.assertEqual(job, jt.get_job()) # progress state jt.state = 'COMPLETED' self.assertEqual(job.state, 'COMPLETED') # but we can't move back jt.state = 'RUNNING' self.assertEqual(job.state, 'COMPLETED')
def metadata(self): uuid = self.request.form.get('uuid') exp = uuidToObject(uuid) if not exp: self.record_error('Not Found', 404, 'Experiment not found', {'parameter': 'uuid'}) raise NotFound(self, 'metadata', self.request) # we found an experiment ... let's build the result retval = { 'id': exp.id, 'uuid': IUUID(exp), 'title': exp.title, 'description': exp.description, 'job_state': IExperimentJobTracker(exp).state, 'url': exp.absolute_url(), 'results': [] } # add info about parameters? # add info about results for result in exp.values(): # Get the content of blob file for modelling_region/projection_region job_params = dict(**result.job_params) if 'modelling_region' in result.job_params: job_params['modelling_region'] = result.job_params['modelling_region'].data if 'projection_region' in result.job_params: job_params['projection_region'] = result.job_params['projection_region'].data retval['results'].append({ 'id': result.id, 'uuid': IUUID(result), 'title': result.title, 'description': result.description, 'job_state': IJobTracker(result).state, 'params': job_params, 'url': result.absolute_url() }) return retval
def __call__(self, **kw): jt = IJobTracker(self.context) # TODO: if state is empty check if there is a downloadable file # Yes: COMPLETED # No: FAILED try: state = jt.state except TypeError: return None if not state: if IBlobDataset.providedBy(self.context): # we have no state, may happen for imported datasets, # let's check if we have a file if self.context.file is not None: state = 'COMPLETED' else: state = 'FAILED' elif IRemoteDataset.providedBy(self.context): if self.context.remoteUrl: state = 'COMPLETED' else: state = 'FAILED' return state
def test_upload_multi_csv_mac(self, mock_move=None): testcsv = resource_string(__name__, 'mock_data/multi_occurrence_mac.csv') view = self.getview() from ZPublisher.HTTPRequest import FileUpload from cgi import FieldStorage from StringIO import StringIO env = {'REQUEST_METHOD': 'PUT'} headers = { 'content-type': 'text/csv', 'content-length': str(testcsv), 'content-disposition': 'attachment; filename=test.csv' } fileupload = FileUpload( FieldStorage(fp=StringIO(testcsv), environ=env, headers=headers)) view.request.form.update({ 'multispeciesoccurrence.buttons.save': u'Save', 'multispeciesoccurrence.widgets.file': fileupload, 'multispeciesoccurrence.widgets.title': u'test species title', 'multispeciesoccurrence.widgets.description': u'some test.csv file', 'multispeciesoccurrence.widgets.legalcheckbox': [u'selected'], 'multispeciesoccurrence.widgets.legalcheckbox-empty-marker': u'1', 'multispeciesoccurrence.widgets.rights': u'test rights', }) _ = view() self.assertEqual(self.portal.REQUEST.response.status, 302) self.assertEqual( self.portal.REQUEST.response.getHeader('Location'), 'http://{0}:{1}/plone/datasets'.format(self.layer.get('host'), self.layer.get('port'))) ds = self.portal.datasets.species.user['test.csv'] self.assertEqual(ds.rights, u'test rights') self.assertEqual(ds.file.data, testcsv) md = IBCCVLMetadata(ds) self.assertEqual(md['genre'], 'DataGenreSpeciesCollection') jt = IJobTracker(ds) self.assertEqual(jt.state, 'PENDING') # patch movelib.move def move_occurrence_data(*args, **kw): # try to move test.csv from dataset src, dst = (urlsplit(x['url']) for x in args) if src.scheme == 'http' and dst.scheme == 'file': # copy test.csv to dst dest_file = os.path.join(dst.path, 'somerandomfilename') shutil.copyfileobj( resource_stream(__name__, 'mock_data/multi_occurrence_mac.csv'), open(dest_file, 'w')) elif src.scheme == 'file' and dst.scheme == 'scp': # copy result back shutil.copyfile(src.path, dst.path) else: raise Exception('Data move failed') mock_move.side_effect = move_occurrence_data # triger background process transaction.commit() # 6 move should have happened self.assertEqual(mock_move.call_count, 5) self.assertEqual( mock_move.call_args_list[0][0][0]['url'], 'http://{0}:{1}/plone/datasets/species/user/test.csv/@@download/file/test.csv' .format(self.layer.get('host'), self.layer.get('port'))) # TODO: should test other call orguments as well # job state should be complete self.assertEqual(jt.state, 'COMPLETED') # metadata should be up to date self.assertEqual(md['rows'], 999) # check other datasets: for name, rows in (('abbreviata.csv', 65), ('acinacea.csv', 596), ('acanthoclada.csv', 322), ('acanthaster.csv', 16)): self.assertIn(name, self.portal.datasets.species.user) tds = self.portal.datasets.species.user[name] tmd = IBCCVLMetadata(tds) self.assertEqual(tmd['rows'], rows) self.assertEqual(len(ds.parts), 4) self.assertEqual(len(ds.parts), len(set(ds.parts)))
def test_upload_zip(self, mock_move): # upload a zip in bccvl bagit format view = self.getview() from ZPublisher.HTTPRequest import FileUpload from cgi import FieldStorage from StringIO import StringIO data = resource_string(__name__, 'mock_data/spc_obl_merc.zip') env = {'REQUEST_METHOD': 'PUT'} headers = { 'content-type': 'text/csv', 'content-length': str(len(data)), 'content-disposition': 'attachment; filename=spc_obl_merc.zip' } fileupload = FileUpload( FieldStorage(fp=StringIO(data), environ=env, headers=headers)) view.request.form.update({ 'climatefuture.buttons.save': u'Save', 'climatefuture.widgets.description': u'some test.tif file', 'climatefuture.widgets.domain': u'Terrestrialdatasets', 'climatefuture.widgets.file': fileupload, 'climatefuture.widgets.title': u'test smulti layer title', 'climatefuture.widgets.legalcheckbox': [u'selected'], 'climatefuture.widgets.legalcheckbox-empty-marker': u'1', 'climatefuture.widgets.rights': u'test rights', 'climatefuture.widgets.emsc': u'SRESB2', 'climatefuture.widgets.gcm': u'cccma-cgcm31', 'climatefuture.widgets.resolution': u'Resolution5m', }) _ = view() self.assertEqual(self.portal.REQUEST.response.status, 302) self.assertEqual( self.portal.REQUEST.response.getHeader('Location'), 'http://{0}:{1}/plone/datasets'.format(self.layer.get('host'), self.layer.get('port'))) ds = self.portal.datasets.climate.user['spc_obl_merc.zip'] self.assertEqual(ds.rights, u'test rights') self.assertEqual(ds.file.data, data) md = IBCCVLMetadata(ds) self.assertEqual(md['genre'], 'DataGenreFC') self.assertEqual(md['resolution'], u'Resolution5m') self.assertEqual(md['emsc'], u'SRESB2') self.assertEqual(md['gcm'], u'cccma-cgcm31') jt = IJobTracker(ds) self.assertEqual(jt.state, 'PENDING') # patch movelib.move def move_occurrence_data(*args, **kw): # try to move test.csv from dataset src, dst = (urlsplit(x['url']) for x in args) # copy test.csv to dst shutil.copyfileobj( resource_stream(__name__, 'mock_data/spc_obl_merc.zip'), open(dst.path, 'w')) mock_move.side_effect = move_occurrence_data # triger background process transaction.commit() # one move should have happened self.assertEqual( mock_move.call_args[0][0]['url'], 'http://{0}:{1}/plone/datasets/climate/user/spc_obl_merc.zip/@@download/file/spc_obl_merc.zip' .format(self.layer.get('host'), self.layer.get('port'))) # job state should be complete self.assertEqual(jt.state, 'COMPLETED') layermd = md['layers']['spc_obl_merc/data/spc_obl_merc_1.tif'] self.assertEqual(layermd['filename'], 'spc_obl_merc/data/spc_obl_merc_1.tif') self.assertEqual(layermd['min'], 19.0) self.assertEqual(layermd['max'], 128.0) self.assertEqual(layermd['datatype'], 'continuous') self.assertEqual(layermd['height'], 200) self.assertEqual(layermd['width'], 200) self.assertEqual(layermd['srs'], None) layermd = md['layers']['spc_obl_merc/data/spc_obl_merc_2.tif'] self.assertEqual(layermd['filename'], 'spc_obl_merc/data/spc_obl_merc_2.tif') self.assertEqual(layermd['min'], 19.0) self.assertEqual(layermd['max'], 128.0) self.assertEqual(layermd['datatype'], 'continuous') self.assertEqual(layermd['height'], 200) self.assertEqual(layermd['width'], 200) self.assertEqual(layermd['srs'], None)
def test_upload_occurrence(self, mock_move): testcsv = resource_string(__name__, 'mock_data/ala_occurrence.csv') view = self.getview() from ZPublisher.HTTPRequest import FileUpload from cgi import FieldStorage from StringIO import StringIO env = {'REQUEST_METHOD': 'PUT'} headers = { 'content-type': 'text/csv', 'content-length': str(len(testcsv)), 'content-disposition': 'attachment; filename=test.csv' } fileupload = FileUpload( FieldStorage(fp=StringIO(testcsv), environ=env, headers=headers)) view.request.form.update({ 'speciesoccurrence.buttons.save': u'Save', 'speciesoccurrence.widgets.file': fileupload, 'speciesoccurrence.widgets.title': u'test species title', 'speciesoccurrence.widgets.description': u'some test.csv file', 'speciesoccurrence.widgets.legalcheckbox': [u'selected'], 'speciesoccurrence.widgets.legalcheckbox-empty-marker': u'1', 'speciesoccurrence.widgets.rights': u'test rights', 'speciesoccurrence.widgets.scientificName': u'test species', 'speciesoccurrence.widgets.taxonID': u'test taxonid', 'speciesoccurrence.widgets.vernacularName': u'test it' }) _ = view() self.assertEqual(self.portal.REQUEST.response.status, 302) self.assertEqual( self.portal.REQUEST.response.getHeader('Location'), 'http://{0}:{1}/plone/datasets'.format(self.layer.get('host'), self.layer.get('port'))) # dataset should exist now ds = self.portal.datasets.species.user['test.csv'] self.assertEqual(ds.rights, u'test rights') self.assertEqual(ds.file.data, testcsv) md = IBCCVLMetadata(ds) self.assertEqual(md['genre'], 'DataGenreSpeciesOccurrence') self.assertEqual(md['species']['taxonID'], u'test taxonid') self.assertEqual(md['species']['scientificName'], u'test species') self.assertEqual(md['species']['vernacularName'], u'test it') jt = IJobTracker(ds) self.assertEqual(jt.state, 'PENDING') # patch movelib.move def move_occurrence_data(*args, **kw): # try to move test.csv from dataset src, dst = (urlsplit(x['url']) for x in args) # copy test.csv to dst shutil.copyfileobj( resource_stream(__name__, 'mock_data/ala_occurrence.csv'), open(dst.path, 'w')) mock_move.side_effect = move_occurrence_data # triger background process transaction.commit() # one move should have happened self.assertEqual( mock_move.call_args[0][0]['url'], 'http://{0}:{1}/plone/datasets/species/user/test.csv/@@download/file/test.csv' .format(self.layer.get('host'), self.layer.get('port'))) # job state should be complete self.assertEqual(jt.state, 'COMPLETED') # metadata should be up to date self.assertEqual(md['rows'], 29)
def test_import_view_ala_import(self, mock_move=None): # TODO: this test needs a running DataMover. (see below)) testdata = { 'taxonID': u'urn:lsid:biodiversity.org.au:afd.taxon:0f02f7b5-d9a1-4b2a-924e-ffeaf122e69c', 'scientificName': u'Pteria penguin', 'vernacularName': u'penguin wing oyster' } view = self.getview() view.request.form.update({ 'import': 'Import', 'lsid': testdata['taxonID'], 'taxon': testdata['scientificName'], 'common': testdata['vernacularName'], 'searchOccurrence_source': 'ala', }) # call view: view() # response should redirect to datasets self.assertEqual(view.request.response.getStatus(), 302) self.assertEqual(view.request.response.getHeader('Location'), self.portal.datasets.absolute_url()) # get new dataset and check state? ds = self.portal.datasets.species.ala['org-bccvl-content-dataset'] # check metadata md = IBCCVLMetadata(ds) self.assertEqual(md['species'], testdata) # check job state jt = IJobTracker(ds) self.assertEqual(jt.state, 'PENDING') # prepare mock side effect def move_ala_data(*args, **kw): src, dst = (urlsplit(x['url']) for x in args) if src.scheme == 'ala': # first call fetch ala data for name in ('ala_metadata.json', 'ala_dataset.json', 'ala_occurrence.zip'): open(os.path.join(dst.path, name), 'w').write( Template( resource_string( __name__, 'mock_data/{}'.format(name))).safe_substitute( tmpdir=dst.path)) if dst.scheme == 'scp': # 2nd call upload to plone shutil.copyfile(src.path, dst.path) mock_move.side_effect = move_ala_data # commit transaction to start job transaction.commit() # verify call ala_call = "ala://ala?url=http://biocache.ala.org.au/ws/occurrences/index/download&query=lsid:urn:lsid:biodiversity.org.au:afd.taxon:0f02f7b5-d9a1-4b2a-924e-ffeaf122e69c&filter=zeroCoordinates,badlyFormedBasisOfRecord,detectedOutlier,decimalLatLongCalculationFromEastingNorthingFailed,missingBasisOfRecord,decimalLatLongCalculationFromVerbatimFailed,coordinatesCentreOfCountry,geospatialIssue,coordinatesOutOfRange,speciesOutsideExpertRange,userVerified,processingError,decimalLatLongConverionFailed,coordinatesCentreOfStateProvince,habitatMismatch&email=" self.assertEqual(mock_move.call_args_list[0][0][0]['url'], ala_call) # celery should run in eager mode so our job state should be up to date # as well self.assertEqual(jt.state, 'COMPLETED') # expand testdata with additional metadata fetched from ala testdata.update({ 'clazz': u'BIVALVIA', 'family': u'PTERIIDAE', 'genus': u'Pteria', 'kingdom': u'ANIMALIA', 'order': u'OSTREIDA', 'phylum': u'MOLLUSCA', 'rank': u'species', }) # we should have a bit more metadat and still the same as before import self.assertEqual(md['species'], testdata) self.assertEqual(md['genre'], 'DataGenreSpeciesOccurrence') self.assertEqual(md['rows'], 29) self.assertEqual( md['headers'], ['species', 'lon', 'lat', 'uncertainty', 'date', 'year', 'month']) self.assertEqual( md['bounds'], { 'top': 14.35, 'right': 177.41, 'left': 48.218334197998, 'bottom': -28.911835 }) # check that there is a file as well self.assertIsNotNone(ds.file) self.assertIsNotNone(ds.file.data) self.assertGreater(len(ds.file.data), 0)
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility(IRegistry).forInterface( ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination( 'swift+{}'.format(new_object.remoteUrl), settings={ 'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_project_name': swiftopts.get('os_project_name'), 'os_storage_url': swiftopts.get('os_storage_url'), 'os_user_domain_name': swiftopts.get('os_user_domain_name'), 'os_project_domain_name': swiftopts.get('os_project_domain_name'), 'auth_version': swiftopts.get('auth_version') } }) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories new_object.subject = [] if self.domain: new_object.subject = [self.domain] if self.timeperiod: new_object.subject += self.timeperiod # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(new_object, self.request, childSpecies=True), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'genre': self.datagenre, 'dataSource': new_object.dataSource, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # FIXME: we can't use ssh here.... we don't know which container we are in... and # sshing here is bad as well.... # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # TODO: we push the uploaded file directly to swift here.. # this really should be a background process # best solution: ... # user uploads to some temporary upload service (file never ends up here) # we have a remote url here, and tell the datamover to pull it from there # and move it to final destination. (or something like this) # other good way: ... # let user upload directly to swift (what about large file uploads?) # and take care of clean up if necessary # 4. move file to swift # TODO: do we have enough information to upload to swift? # need a temp url? swiftopts = app.conf.get('bccvl', {}).get('swift', {}) src_url = build_source('file://{}'.format(tmpfile)) dest_url = build_destination('swift+{}'.format(new_object.remoteUrl), settings={'swift': { 'os_auth_url': swiftopts.get('os_auth_url'), 'os_username': swiftopts.get('os_username'), 'os_password': swiftopts.get('os_password'), 'os_tenant_name': swiftopts.get('os_tenant_name'), 'os_storage_url': swiftopts.get('os_storage_url') }} ) try: movelib.move(src_url, dest_url) except Exception as e: # do error handling here raise finally: # clean up temp location path = os.path.dirname(tmpfile) shutil.rmtree(path) # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=new_object.dataSource, type=new_object.portal_type) jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()
def upgrade_220_230_1(context, logger=None): if logger is None: logger = LOG # Run GS steps portal = api.portal.get() setup = api.portal.get_tool('portal_setup') setup.runImportStepFromProfile(PROFILE_ID, 'org.bccvl.site.content') setup.runImportStepFromProfile(PROFILE_ID, 'plone.app.registry') setup.runImportStepFromProfile(PROFILE_ID, 'actions') pc = api.portal.get_tool('portal_catalog') # search all experiments and update job object with infos from experiment # -> delete job info on experiment LOG.info('Migrating job data for experiments') EXP_TYPES = ['org.bccvl.content.sdmexperiment', 'org.bccvl.content.projectionexperiment', 'org.bccvl.content.biodiverseexperiment', 'org.bccvl.content.ensemble', 'org.bccvl.content.speciestraitsexperiment' ] from org.bccvl.site.job.interfaces import IJobTracker import json for brain in pc.searchResults(portal_type=EXP_TYPES): # Update job with process statistic i.e. rusage for result in brain.getObject().values(): if not 'pstats.json' in result: continue jt = IJobTracker(result) job = None try: job = jt.get_job() except Exception as e: LOG.info('Could not resolve %s: %s', result, e) if not job: continue pstats = result['pstats.json'] if hasattr(pstats, 'file'): job.rusage = json.loads(pstats.file.data) del result['pstats.json'] # Setup cookie settings sess = portal.acl_users.session sess.manage_changeProperties( mod_auth_tkt=True, secure=True ) # update facet configurations from org.bccvl.site.faceted.interfaces import IFacetConfigUtility from org.bccvl.site.faceted.tool import import_facet_config fct = getUtility(IFacetConfigUtility) for cfgobj in fct.types(): LOG.info("Import facet config for %s", cfgobj.id) import_facet_config(cfgobj) # set cookie secret from celery configuration from org.bccvl.tasks.celery import app cookie_cfg = app.conf.get('bccvl', {}).get('cookie', {}) if cookie_cfg.get('secret', None): sess._shared_secret = cookie_cfg.get('secret').encode('utf-8') sess = portal.acl_users.session sess.manage_changeProperties( mod_auth_tkt=True, secure=cookie_cfg.get('secure', True) )
def submitcc(self): # TODO: catch UNAuthorized correctly and return json error if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') # make sure we have the right context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # experiments location context = self.context.restrictedTraverse( defaults.EXPERIMENTS_FOLDER_ID) else: # custom context.... let's use in context = self.context # parse request body params = self.request.form # validate input # TODO: should validate type as well..... (e.g. string has to be # string) # TODO: validate dataset and layer id's existence if possible props = {} if not params.get('title', None): self.record_error('Bad Request', 400, 'Missing parameter title', {'parameter': 'title'}) else: props['title'] = params['title'] props['description'] = params.get('description', '') if not params.get('species_distribution_models', None): self.record_error('Bad Request', 400, 'Missing parameter species_distribution_models', {'parameter': 'species_distribution_models'}) else: props['species_distribution_models'] = params[ 'species_distribution_models'] if not params.get('future_climate_datasets', None): self.record_error('Bad Request', 400, 'Missing parameter future_climate_datasets', {'parameter': 'future_climate_datasets'}) else: props['future_climate_datasets'] = params[ 'future_climate_datasets'] if params.get('projection_region', ''): props['projection_region'] = NamedBlobFile( data=json.dumps(params['projection_region'])) else: props['projection_region'] = None if self.errors: raise BadRequest("Validation Failed") # create experiment with data as form would do # TODO: make sure self.context is 'experiments' folder? from plone.dexterity.utils import createContent, addContentToContainer experiment = createContent("org.bccvl.content.projectionexperiment", **props) experiment = addContentToContainer(context, experiment) # FIXME: need to get resolution from somewhere IBCCVLMetadata(experiment)['resolution'] = 'Resolution30m' # submit newly created experiment # TODO: handle background job submit .... at this stage we wouldn't # know the model run job ids # TODO: handle submit errors and other errors that may happen above? # generic exceptions could behandled in returnwrapper retval = { 'experiment': { 'url': experiment.absolute_url(), 'uuid': IUUID(experiment) }, 'jobs': [], } jt = IExperimentJobTracker(experiment) msgtype, msg = jt.start_job(self.request) if msgtype is not None: retval['message'] = { 'type': msgtype, 'message': msg } for result in experiment.values(): jt = IJobTracker(result) retval['jobs'].append(jt.get_job().id) return retval
def update_metadata(self): uuid = self.request.form.get('uuid', None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: raise Exception("Invalid user") # build download url # 1. get context (site) relative path obj_url = obj.absolute_url() if obj.portal_type == 'org.bccvl.content.dataset': filename = obj.file.filename obj_url = '{}/@@download/file/{}'.format(obj_url, filename) elif obj.portal_type == 'org.bccvl.content.remotedataset': filename = os.path.basename(obj.remoteUrl) obj_url = '{}/@@download/{}'.format(obj_url, filename) elif obj.portal_type == 'org.bccvl.content.multispeciesdataset': filename = obj.file.filename obj_url = '{}/@@download/file/{}'.format(obj_url, filename) else: raise Exception("Wrong content type") from org.bccvl.tasks.celery import app update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': obj_url, 'filename': filename, 'contenttype': obj.format, 'context': { 'context': '/'.join(obj.getPhysicalPath()), 'user': user, } }, immutable=True) from org.bccvl.tasks.plone import after_commit_task after_commit_task(update_task) # track background job state jt = IJobTracker(obj) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata', function=obj.dataSource, type=obj.portal_type) jt.set_progress('PENDING', 'Metadata update pending') return job.id except Exception as e: LOG.error('Caught exception %s', e) raise NotFound(self, 'update_metadata', self.request)
def import_trait_data(self): if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') source = self.request.form.get('source', None) species = self.request.form.get('species', None) traits = self.request.form.get('traits', None) environ = self.request.form.get('environ', None) dataurl = self.request.form.get('url', None) context = None if not source or source not in ('aekos', 'zoatrack'): raise BadRequest("source parameter must be 'aekos' or 'zoatrack'") # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse("/".join( (defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, str(source)))) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission('org.bccvl.AddDataset', context): raise Unauthorized("User not allowed in this context") # check parameters if not species or not isinstance(species, (basestring, list)): raise BadRequest("Missing or invalid species parameter") elif isinstance(species, basestring): species = [species] # for zoatrack, url needs to be set if source == 'zoatrack' and not dataurl: raise BadRequest("url has to be set") # for aekos, at least a trait or environment variable must be specified. if source == 'aekos' and not traits and not environ: raise BadRequest( "At least a trait or environent variable has to be set") if not traits: traits = [] elif isinstance(traits, basestring): traits = [traits] if not environ: environ = [] elif isinstance(environ, basestring): environ = [environ] # all good so far # pull dataset from aekos title = ' '.join(species) # determine dataset type portal_type = 'org.bccvl.content.dataset' swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' # create content ds = createContent(portal_type, title=title) ds.dataSource = source ds.description = u' '.join([ title, ','.join(traits), ','.join(environ), u' imported from {}'.format(source.upper()) ]) ds = addContentToContainer(context, ds) md = IBCCVLMetadata(ds) md['genre'] = 'DataGenreTraits' md['categories'] = ['traits'] md['species'] = [{ 'scientificName': spec, 'taxonID': spec } for spec in species] md['traits'] = traits md['environ'] = environ md['dataurl'] = dataurl # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add('New Dataset created', type='info') # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, 'portal_url').getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader('Location', nexturl) # FIXME: should return a nice json representation of success or error return { 'status': status, 'message': message, 'jobid': IJobTracker(ds).get_job().id }
def submitsdm(self): # TODO: catch UNAuthorized correctly and return json error if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') # make sure we have the right context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # experiments location context = self.context.restrictedTraverse( defaults.EXPERIMENTS_FOLDER_ID) else: # custom context.... let's use in context = self.context # parse request body params = self.request.form # validate input # TODO: should validate type as well..... (e.g. string has to be # string) # TODO: validate dataset and layer id's existence if possible props = {} if not params.get('title', None): self.record_error('Bad Request', 400, 'Missing parameter title', {'parameter': 'title'}) else: props['title'] = params['title'] props['description'] = params.get('description', '') if not params.get('occurrence_data', None): self.record_error('Bad Request', 400, 'Missing parameter occurrence_data', {'parameter': 'occurrence_data'}) else: # FIXME: should properly support source / id # for now only bccvl source is supported props['species_occurrence_dataset'] = params[ 'occurrence_data']['id'] # FIXME: should properly support source/id for onw only bccvl source is # supported props['species_absence_dataset'] = params.get( 'absence_data', {}).get('id', None) props['scale_down'] = params.get('scale_down', False) if not params.get('environmental_data', None): self.record_error('Bad Request', 400, 'Missing parameter environmental_data', {'parameter': 'environmental_data'}) else: props['environmental_datasets'] = params['environmental_data'] if params.get('modelling_region', ''): props['modelling_region'] = NamedBlobFile( data=json.dumps(params['modelling_region'])) else: props['modelling_region'] = None if not params.get('algorithms', None): self.record_error('Bad Request', 400, 'Missing parameter algorithms', {'parameter': 'algorithms'}) else: portal = ploneapi.portal.get() props['functions'] = {} # FIXME: make sure we get the default values from our func object for algo, algo_params in params['algorithms'].items(): if algo_params is None: algo_params = {} toolkit = portal[defaults.FUNCTIONS_FOLDER_ID][algo] toolkit_model = loadString(toolkit.schema) toolkit_schema = toolkit_model.schema func_props = {} for field_name in toolkit_schema.names(): field = toolkit_schema.get(field_name) value = algo_params.get(field_name, field.missing_value) if value == field.missing_value: func_props[field_name] = field.default else: func_props[field_name] = value props['functions'][IUUID(toolkit)] = func_props if self.errors: raise BadRequest("Validation Failed") # create experiment with data as form would do # TODO: make sure self.context is 'experiments' folder? from plone.dexterity.utils import createContent, addContentToContainer experiment = createContent("org.bccvl.content.sdmexperiment", **props) experiment = addContentToContainer(context, experiment) # TODO: check if props and algo params have been applied properly experiment.parameters = dict(props['functions']) # FIXME: need to get resolution from somewhere IBCCVLMetadata(experiment)['resolution'] = 'Resolution30m' # submit newly created experiment # TODO: handle background job submit .... at this stage we wouldn't # know the model run job ids # TODO: handle submit errors and other errors that may happen above? # generic exceptions could behandled in returnwrapper retval = { 'experiment': { 'url': experiment.absolute_url(), 'uuid': IUUID(experiment) }, 'jobs': [], } jt = IExperimentJobTracker(experiment) msgtype, msg = jt.start_job(self.request) if msgtype is not None: retval['message'] = { 'type': msgtype, 'message': msg } for result in experiment.values(): jt = IJobTracker(result) retval['jobs'].append(jt.get_job().id) return retval
def submittraits(self): # TODO: catch UNAuthorized correctly and return json error if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') # make sure we have the right context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # experiments location context = self.context.restrictedTraverse( defaults.EXPERIMENTS_FOLDER_ID) else: # custom context.... let's use in context = self.context # parse request body params = self.request.form # validate input # TODO: should validate type as well..... (e.g. string has to be # string) # TODO: validate dataset and layer id's existence if possible props = {} if params.get('species_list', None): props['species_list'] = params['species_list'] else: self.record_error('Bad Request', 400, 'Missing parameter speciesList', {'parameter': 'speciesList'}) if not params.get('title', None): self.record_error('Bad Request', 400, 'Missing parameter title', {'parameter': 'title'}) else: props['title'] = params['title'] props['description'] = params.get('description', '') if not params.get('traits_data', None): self.record_error('Bad Request', 400, 'Missing parameter traits_data', {'parameter': 'traits_data'}) else: # FIXME: should properly support source / id # for now only bccvl source is supported props['species_traits_dataset'] = params[ 'traits_data']['id'] props['species_traits_dataset_params'] = {} for col_name, col_val in params.get("columns", {}).items(): if col_val not in ('lat', 'lon', 'species', 'trait_con', 'trait_ord', 'trait_nom', 'env_var_con', 'env_var_cat', 'random_con', 'random_cat'): continue props['species_traits_dataset_params'][col_name] = col_val if not props['species_traits_dataset_params']: self.record_error('Bad Request', 400, 'Invalid values for columns', {'parameter': 'columns'}) # Check for species-level trait data i.e. species is not specified if 'species' not in props['species_traits_dataset_params'].values(): props['species_list'] = [] props['scale_down'] = params.get('scale_down', False) # env data is optional props['environmental_datasets'] = params.get('environmental_data', None) if not (props['environmental_datasets'] or 'env_var_con' not in props['species_traits_dataset_params'].values() or 'env_var_cat' not in props['species_traits_dataset_params'].values()): self.record_error('Bad Request', 400, 'No Environmental data selected', {'parameter': 'environmental_datasets'}) if params.get('modelling_region', ''): props['modelling_region'] = NamedBlobFile( data=json.dumps(params['modelling_region'])) else: props['modelling_region'] = None if not params.get('algorithms', None): self.record_error('Bad Request', 400, 'Missing parameter algorithms', {'parameter': 'algorithms'}) else: props['algorithms_species'] = {} props['algorithms_diff'] = {} funcs_env = getUtility( IVocabularyFactory, 'traits_functions_species_source')(context) funcs_species = getUtility( IVocabularyFactory, 'traits_functions_diff_source')(context) # FIXME: make sure we get the default values from our func object for algo_uuid, algo_params in params['algorithms'].items(): if algo_params is None: algo_params = {} toolkit = uuidToObject(algo_uuid) toolkit_model = loadString(toolkit.schema) toolkit_schema = toolkit_model.schema func_props = {} for field_name in toolkit_schema.names(): field = toolkit_schema.get(field_name) value = algo_params.get(field_name, field.missing_value) if value == field.missing_value: func_props[field_name] = field.default else: func_props[field_name] = value if algo_uuid in funcs_env: props['algorithms_species'][algo_uuid] = func_props elif algo_uuid in funcs_species: props['algorithms_diff'][algo_uuid] = func_props else: LOG.warn( 'Algorithm {} not in allowed list of functions'.format(toolkit.id)) if not (props['algorithms_species'] or props['algorithms_diff']): self.record_error('Bad Request', 400, 'Iinvalid algorithms selected', {'parameter': 'algorithms'}) if self.errors: raise BadRequest("Validation Failed") # create experiment with data as form would do # TODO: make sure self.context is 'experiments' folder? from plone.dexterity.utils import createContent, addContentToContainer experiment = createContent( "org.bccvl.content.speciestraitsexperiment", **props) experiment = addContentToContainer(context, experiment) experiment.parameters = dict(props['algorithms_species']) experiment.parameters.update(dict(props['algorithms_diff'])) # FIXME: need to get resolution from somewhere IBCCVLMetadata(experiment)['resolution'] = 'Resolution30m' # submit newly created experiment # TODO: handle background job submit .... at this stage we wouldn't # know the model run job ids # TODO: handle submit errors and other errors that may happen above? # generic exceptions could behandled in returnwrapper retval = { 'experiment': { 'url': experiment.absolute_url(), 'uuid': IUUID(experiment) }, 'jobs': [], } jt = IExperimentJobTracker(experiment) msgtype, msg = jt.start_job(self.request) if msgtype is not None: retval['message'] = { 'type': msgtype, 'message': msg } for result in experiment.values(): jt = IJobTracker(result) retval['jobs'].append(jt.get_job().id) return retval
def update_metadata(self): uuid = self.request.form.get("uuid", None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), } else: raise Exception("Invalid user") # build download url # 1. get context (site) relative path obj_url = obj.absolute_url() if obj.portal_type == "org.bccvl.content.dataset": filename = obj.file.filename obj_url = "{}/@@download/file/{}".format(obj_url, filename) elif obj.portal_type == "org.bccvl.content.remotedataset": filename = os.path.basename(obj.remoteUrl) obj_url = "{}/@@download/{}".format(obj_url, filename) elif obj.portal_type == "org.bccvl.content.multispeciesdataset": filename = obj.file.filename obj_url = "{}/@@download/file/{}".format(obj_url, filename) else: raise Exception("Wrong content type") from org.bccvl.tasks.celery import app update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ "url": obj_url, "filename": filename, "contenttype": obj.format, "context": {"context": "/".join(obj.getPhysicalPath()), "user": user}, }, immutable=True, ) from org.bccvl.tasks.plone import after_commit_task after_commit_task(update_task) # track background job state jt = IJobTracker(obj) job = jt.new_job("TODO: generate id", "generate taskname: update_metadata") job.type = obj.portal_type jt.set_progress("PENDING", "Metadata update pending") return job.id except Exception as e: LOG.error("Caught exception %s", e) raise NotFound(self, "update_metadata", self.request)
def import_ala_data(self): if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') context = None # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse("/".join( (defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, 'ala'))) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission('org.bccvl.AddDataset', context): raise Unauthorized("User not allowed in this context") params = self.request.form.get('data') if not params: raise BadRequest("At least on of traits or environ has to be set") if params is None: self.record_error('Bad Request', 400, 'Missing parameter data', {'parameter': 'data'}) if not params: self.record_error('Bad Request', 400, 'Empty parameter data', {'parameter': 'data'}) # TODO: should validate objects inside as well? (or use json schema # validation?) # all good so far # pull dataset from aekos # TODO: get better name here title = params[0].get('name', 'ALA import') # determine dataset type # 1. test if it is a multi species import species = set() for query in params: biocache_url = '{}/occurrences/search'.format(query['url']) query = { 'q': query['query'], 'pageSize': 0, 'limit': 2, 'facets': 'species_guid', 'fq': 'species_guid:*' # skip results without species guid } res = requests.get(biocache_url, params=query) res = res.json() # FIXME: do we need to treat sandbox downloads differently? if res.get('facetResults'): # do we have some results at all? for guid in res['facetResults'][0]['fieldResult']: species.add(guid['label']) if len(species) > 1: portal_type = 'org.bccvl.content.multispeciesdataset' else: portal_type = 'org.bccvl.content.dataset' swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' # create content ds = createContent(portal_type, title=title) ds.dataSource = 'ala' ds.description = u' '.join([title, u' imported from ALA']) ds.import_params = params ds = addContentToContainer(context, ds) md = IBCCVLMetadata(ds) if IMultiSpeciesDataset.providedBy(ds): md['genre'] = 'DataGenreSpeciesCollection' md['categories'] = ['multispecies'] else: # species dataset md['genre'] = 'DataGenreSpeciesOccurrence' md['categories'] = ['occurrence'] # TODO: populate this correctly as well md['species'] = [{'scientificName': 'qid', 'taxonID': 'qid'}] # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add('New Dataset created', type='info') # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, 'portal_url').getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader('Location', nexturl) # FIXME: should return a nice json representation of success or error return { 'status': status, 'message': message, 'jobid': IJobTracker(ds).get_job().id }
def add(self, object): # FIXME: this is a workaround, which is fine for small uploaded files. # large uploads should go through another process anyway # TODO: re implementing this method is the only way to know # the full path of the object. We need the path to apply # the transmogrifier chain. # fti = getUtility(IDexterityFTI, name=self.portal_type) container = aq_inner(self.context) try: # traverse to subfolder if possible container = container.restrictedTraverse('/'.join(self.subpath)) except Exception as e: LOG.warn('Could not traverse to %s/%s', '/'.join(container.getPhysicalPath()), '/'.join(self.subpath)) new_object = addContentToContainer(container, object) # set data genre: if self.datagenre: IBCCVLMetadata(new_object)['genre'] = self.datagenre if self.categories: IBCCVLMetadata(new_object)['categories'] = self.categories # rdf commit should happens in transmogrifier step later on # if fti.immediate_view: # self.immediate_view = "%s/%s/%s" % (container.absolute_url(), new_object.id, fti.immediate_view,) # else: # self.immediate_view = "%s/%s" % (container.absolute_url(), new_object.id) # start background import process (just a metadata update) # run transmogrify md extraction here context_path = '/'.join(new_object.getPhysicalPath()) member = api.user.get_current() # species extract task if IMultiSpeciesDataset.providedBy(new_object): # kick off csv split import tasks import_task = app.signature( "org.bccvl.tasks.datamover.tasks.import_multi_species_csv", kwargs={ 'url': '{}/@@download/file/{}'.format(new_object.absolute_url(), new_object.file.filename), 'results_dir': get_results_dir(container, self.request), 'import_context': { 'context': '/'.join(container.getPhysicalPath()), 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } }, 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) after_commit_task(import_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: import_multi_species_csv') job.type = new_object.portal_type jt.set_progress('PENDING', u'Multi species import pending') else: if hasattr(self, '_upload'): file = self._upload['file'] new_object.format = file.contentType uid = IUUID(new_object) swiftsettings = getUtility( IRegistry).forInterface(ISwiftSettings) import os.path swift_url = '{storage_url}/{container}/{path}/{name}'.format( storage_url=swiftsettings.storage_url, container=swiftsettings.result_container, path=uid, name=os.path.basename(file.filename)) new_object.remoteUrl = swift_url else: file = new_object.file new_object.format = file.contentType dlinfo = IDownloadInfo(new_object) # single species upload update_task = app.signature( "org.bccvl.tasks.datamover.tasks.update_metadata", kwargs={ 'url': dlinfo['url'], 'filename': dlinfo['filename'], 'contenttype': dlinfo['contenttype'], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) # create upload task in case we upload to external store if hasattr(self, '_upload'): # There is an upload ... we have to make sure the uploaded data ends up in external storage # 3. put temp file aside tmpdir = tempfile.mkdtemp(prefix='bccvl_upload') tmpfile = os.path.join(tmpdir, os.path.basename(file.filename)) blobf = file.open() try: # try rename os.rename(blobf.name, tmpfile) except OSError: # try copy shutil.copy(blobf.name, tmpfile) # 4. update task chain src_url = 'scp://{uid}@{ip}:{port}{file}'.format( uid=pwd.getpwuid(os.getuid()).pw_name, ip=get_hostname(self.request), port=os.environ.get('SSH_PORT', 22), file=tmpfile) dest_url = 'swift+{}'.format(new_object.remoteUrl) move_task = app.signature( 'org.bccvl.tasks.datamover.tasks.move', kwargs={ 'move_args': [(src_url, dest_url)], 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) cleanup_task = app.signature( 'org.bccvl.tasks.plone.import_cleanup', kwargs={ 'path': os.path.dirname(tmpfile), 'context': { 'context': context_path, 'user': { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } } }, immutable=True) update_task = move_task | update_task | cleanup_task # need some more workflow states here to support e.g. zip file upload (multiple rasters), # give user a chance to better define metadata # make sure update_metadata does not change user edited metadata # -> layer, unit, projection, whatever # FIXME: clean up tmp upload directory as well # queue job submission after_commit_task(update_task) # create job tracking object jt = IJobTracker(new_object) job = jt.new_job('TODO: generate id', 'generate taskname: update_metadata') job.type = new_object.portal_type jt.set_progress('PENDING', u'Metadata update pending') # We have to reindex after updating the object new_object.reindexObject()