def biodiverse_listing_details(expbrain): details = {} exp = expbrain.getObject() species = set() years = set() emscs = set() gcms = set() for dsuuid in chain.from_iterable(map(lambda x: x.keys(), exp.projection.itervalues())): dsobj = uuidToObject(dsuuid) # TODO: should inform user about missing dataset if dsobj: md = IBCCVLMetadata(dsobj) species.add(md.get("species", {}).get("scientificName", "")) period = md.get("temporal") if period: years.add(Period(period).start) gcm = md.get("gcm") if gcm: gcms.add(gcm) emsc = md.get("emsc") if emsc: emscs.add(emsc) details.update( { "type": "BIODIVERSE", "functions": "endemism, redundancy", "species_occurrence": ", ".join(sorted(species)), "species_absence": "{}, {}".format(", ".join(sorted(emscs)), ", ".join(sorted(gcms))), "years": ", ".join(sorted(years)), } ) return details
def rat(self): uuid = self.request.form.get('uuid') layer = self.request.form.get('layer') brain = None try: brain = uuidToCatalogBrain(uuid) except Exception as e: LOG.error('Caught exception %s', e) if not brain: self.record_error('Not Found', 404, 'dataset not found', {'parameter': 'uuid'}) raise NotFound(self, 'metadata', self.request) md = IBCCVLMetadata(brain.getObject()) if not layer and layer not in md.get('layers', {}): self.record_error('Bad Request', 400, 'Missing parameter layer', {'parameter': 'layer'}) raise BadRequest('Missing parameter layer') try: rat = md.get('layers', {}).get(layer, {}).get('rat') rat = json.loads(unicode(rat)) return rat except Exception as e: LOG.warning( "Couldn't decode Raster Attribute Table from metadata. %s: %s", self.context, repr(e)) raise NotFound(self, 'rat', self.request)
def DatasetSearchableText(obj, **kw): md = IBCCVLMetadata(obj) entries = [safe_unicode(obj.id), safe_unicode(obj.title) or u"", safe_unicode(obj.description) or u""] if "layers" in md: layer_vocab = getUtility(IVocabularyFactory, "layer_source")(obj) for key in md["layers"]: if key not in layer_vocab: continue entries.append(safe_unicode(layer_vocab.getTerm(key).title) or u"") if "species" in md: entries.extend( ( safe_unicode(md.get("species", {}).get("scientificName")) or u"", safe_unicode(md.get("species", {}).get("vernacularName")) or u"", ) ) if md.get("genre") == "DataGenreFC": # year, gcm, emsc emsc_vocab = getUtility(IVocabularyFactory, "emsc_source")(obj) gcm_vocab = getUtility(IVocabularyFactory, "gcm_source")(obj) year = unicode(md.get("year", u"")) month = unicode(md.get("month", u"")) if md["emsc"] in emsc_vocab: entries.append(safe_unicode(emsc_vocab.getTerm(md["emsc"]).title) or u"") if md["gcm"] in gcm_vocab: entries.append(safe_unicode(gcm_vocab.getTerm(md["gcm"]).title) or u"") entries.append(year) entries.append(month) elif md.get("genre") == "DataGenreCC": entries.append(u"current") return u" ".join(entries)
def subitems(self, dsbrain): # return a generator of selectable items within dataset md = IBCCVLMetadata(dsbrain.getObject()) layer_vocab = self.dstools.layer_vocab selectedsubitems = self.value.get(dsbrain.UID) or () if md.get('genre') != 'DataGenreSpeciesCollection': for layer in sorted(md.get('layers', ())): subitem = { 'id': layer, 'title': layer_vocab.getTerm(layer).title if layer in layer_vocab else layer, 'selected': layer in selectedsubitems, } yield subitem for subdsid in sorted(getattr(dsbrain.getObject(), 'parts', ())): part = uuidToCatalogBrain(subdsid) # TODO: should we just ignore it? if not part: continue subitem = { 'id': subdsid, 'title': part.Title, 'selected': subdsid in selectedsubitems } yield subitem
def test_filemetadata(self): ds = self.get_dataset(defaults.DATASETS_SPECIES_FOLDER_ID, 'ABT', 'occurrence.csv') md = IBCCVLMetadata(ds) self.assertEqual(md.get('rows'), 3) self.assertEqual(md.get('bounds'), { 'bottom': 1, 'left': 1, 'top': 3, 'right': 3}) self.assertEqual(md.get('headers'), ['Name', 'lon', 'lat']) self.assertIn('species', md) # check if species attribute exists
def test_filemetadata(self): ds = self.get_dataset(defaults.DATASETS_SPECIES_FOLDER_ID, 'ABT', 'occurrence.csv') from org.bccvl.site.interfaces import IBCCVLMetadata md = IBCCVLMetadata(ds) self.assertEqual(md.get('rows'), 3) self.assertEqual(md.get('bounds'), {'bottom': 1, 'left': 1, 'top': 3, 'right': 3}) self.assertEqual(md.get('headers'), ['Name', 'lon', 'lat']) self.assertIn('species', md) # check if species attribute exists
def biodiverse_listing_details(expbrain): details = {} exp = expbrain.getObject() species = set() years = set() months = set() emscs = set() gcms = set() inputexps = set() for expuuid, val in exp.projection.iteritems(): inputexps.add(get_title_from_uuid(expuuid, u'(Unavailable)')) for dsuuid in val: dsobj = uuidToObject(dsuuid) # TODO: should inform user about missing dataset if dsobj: md = IBCCVLMetadata(dsobj) species.add( md.get('species', {}).get('scientificName', u'(Unavailable)')) year = md.get('year') if year: years.add(year) month = md.get('month') if month: months.add(month) gcm = md.get('gcm') if gcm: gcms.add(gcm) emsc = md.get('emsc') if emsc: emscs.add(emsc) details.update({ 'type': 'BIODIVERSE', 'functions': 'endemism, redundancy', 'species_occurrence': ', '.join(sorted(species)), 'species_absence': '{}, {}'.format(', '.join(sorted(emscs)), ', '.join(sorted(gcms))), 'years': ', '.join(sorted(years)), 'months': ', '.join(sorted(months)), 'input_experiments': inputexps }) return details
def dataset_environmental_layer(obj, **kw): md = IBCCVLMetadata(obj) # if we have 'layers_used' index it if 'layers_used' in md: return md['layers_used'] # otherwise index list of layers provided by dataset return md.get('layers', None)
def getdatasetparams(uuid): # return dict with: # filename # downloadurl # dm_accessurl-> maybe add url rewrite to datamover? # # occurrence specific: # species # # raster specific: # layers ... need to split this up dsobj = uuidToObject(uuid) if dsobj is None: return None dsinfo = getDatasetInfo(dsobj, uuid) # if we have species info add it dsmdr = IBCCVLMetadata(dsobj) species = dsmdr.get('species', {}).get('scientificName') if species: dsinfo['species'] = species # if we can get layermetadata, let's add it biomod = getdsmetadata(dsobj) layers = biomod.get('layers', []) if len(layers) > 0: for lk, lv in biomod['layers'].items(): if lv is not None: dsinfo.setdefault('layers', {})[lk] = { 'filename': lv.get('filename', biomod['filename']), 'datatype': lv.get('datatype', None) } # return infoset return dsinfo
def items(self): # return dict with keys for experiment # and subkey 'models' for models within experiment if self.value: for experiment_uuid, model_uuids in self.value.items(): item = {} expbrain = uuidToCatalogBrain(experiment_uuid) item['title'] = expbrain.Title item['uuid'] = expbrain.UID # TODO: what else wolud I need from an experiment? exp = expbrain.getObject() expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') item['brain'] = expbrain # now search all models within and add infos pc = getToolByName(self.context, 'portal_catalog') brains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=self.genre) # TODO: maybe as generator? item['datasets'] = [{ 'uuid': brain.UID, 'title': brain.Title, 'obj': brain.getObject(), 'md': IBCCVLMetadata(brain.getObject()), 'selected': brain.UID in self.value[experiment_uuid] } for brain in brains] yield item
def details(self, context=None): # fetch details about dataset, if attributes are unpopulated # get data from associated collection if context is None: context = self.context coll = context while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ # we have either hit siteroot or found a collection ret = { 'title': context.title, 'description': context.description or coll.description, 'attribution': context.attribution or getattr(coll, 'attribution'), 'rights': context.rights or coll.rights, 'external_description': context.external_description or getattr(coll, 'external_description'), } md = IBCCVLMetadata(context) if 'layers' in md: layers = [] for layer in sorted(md.get('layers', ())): try: layers.append(self.layer_vocab.getTerm(layer)) except: layers.append(SimpleTerm(layer, layer, layer)) if layers: ret['layers'] = layers return ret
def items(self): # return dict with keys for experiment # and subkey 'models' for models within experiment if self.value: for experiment_uuid, model_uuids in self.value.items(): item = {} expbrain = uuidToCatalogBrain(experiment_uuid) item['title'] = expbrain.Title item['uuid'] = expbrain.UID # TODO: what else wolud I need from an experiment? exp = expbrain.getObject() expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') item['brain'] = expbrain # now search all models within and add infos pc = getToolByName(self.context, 'portal_catalog') brains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=self.genre) # TODO: maybe as generator? item['datasets'] = [{'uuid': brain.UID, 'title': brain.Title, 'obj': brain.getObject(), 'md': IBCCVLMetadata(brain.getObject()), 'selected': brain.UID in self.value[experiment_uuid]} for brain in brains] yield item
def getdatasetparams(uuid): # return dict with: # filename # downloadurl # dm_accessurl-> maybe add url rewrite to datamover? # # occurrence specific: # species # # raster specific: # layers ... need to split this up dsobj = uuidToObject(uuid) if dsobj is None: return None dsinfo = getDatasetInfo(dsobj, uuid) # if we have species info add it dsmdr = IBCCVLMetadata(dsobj) species = dsmdr.get('species', {}).get('scientificName') if species: dsinfo['species'] = species # if we can get layermetadata, let's add it biomod = getdsmetadata(dsobj) layers = biomod.get('layers', []) if len(layers) > 0: for lk, lv in biomod['layers'].items(): if lv is not None: dsinfo.setdefault('layers', {})[lk] = { 'filename': lv.get('filename', biomod['filename']), 'datatype': lv.get('datatype', None)} # return infoset return dsinfo
def year(obj, **kw): # FIXME: this indexer is meant for future projection only .... # - make sure we don't index any other datasets. i.e. environmental and current datasets, which may have a date attached to it, but it is meaningless for future projections md = IBCCVLMetadata(obj) year = md.get('year', None) if year: year = str(year) return year
def scientific_category(obj, **kw): md = IBCCVLMetadata(obj) vocab = getUtility(IVocabularyFactory, 'scientific_category_source')(obj) path = set() for cat in md.get('categories', ()): path.update(vocab.getTermPath(cat)) if path: return tuple(path) return None
def items(self): # return dict with keys for experiment # and subkey 'models' for models within experiment if self.value: for experiment_uuid, model_uuids in self.value.items(): item = {} expbrain = uuidToCatalogBrain(experiment_uuid) # TODO: we have an experiment_uuid, but can't access the # experiment (deleted?, access denied?) # shall we at least try to get some details? if expbrain is None: continue item['title'] = expbrain.Title item['uuid'] = expbrain.UID item['brain'] = expbrain # TODO: what else wolud I need from an experiment? exp = expbrain.getObject() expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') # now search all datasets within and add infos pc = getToolByName(self.context, 'portal_catalog') results = pc.searchResults(path=expbrain.getPath(), portal_type='Folder', job_state='COMPLETED') brains = pc.searchResults(path=[r.getPath() for r in results], BCCDataGenre=self.genre) # TODO: maybe as generator? item['subitems'] = [] for brain in brains: # FIXME: I need a different list of thresholds for display; # esp. don't look up threshold, but take vales (threshold # id and value) from field as is thresholds = dataset.getThresholds(brain.UID)[brain.UID] threshold = self.value[experiment_uuid].get(brain.UID) # is threshold in list? if threshold and threshold['label'] not in thresholds: # maybe a custom entered number? # ... I guess we don't really care as long as we produce the same the user entered. (validate?) thresholds[threshold['label']] = threshold['label'] dsobj = brain.getObject() dsmd = IBCCVLMetadata(dsobj) item['subitems'].append({ 'uuid': brain.UID, 'title': brain.Title, 'selected': brain.UID in self.value[experiment_uuid], 'threshold': threshold, 'thresholds': thresholds, 'brain': brain, 'md': dsmd, 'obj': dsobj, # TODO: this correct? only one layer ever? 'layermd': dsmd['layers'].values()[0] }) yield item
def items(self): # return dict with keys for experiment # and subkey 'models' for models within experiment if self.value: for experiment_uuid, model_uuids in self.value.items(): item = {} expbrain = uuidToCatalogBrain(experiment_uuid) item['title'] = expbrain.Title item['uuid'] = expbrain.UID item['brain'] = expbrain # TODO: what else wolud I need from an experiment? exp = expbrain.getObject() expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') # now search all datasets within and add infos pc = getToolByName(self.context, 'portal_catalog') brains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=self.genre) # TODO: maybe as generator? item['datasets'] = [] for brain in brains: # FIXME: I need a different list of thresholds for display; esp. don't look up threshold, but take vales (threshold id and value) from field as is thresholds = dataset.getThresholds(brain.UID)[brain.UID] threshold = self.value[experiment_uuid].get(brain.UID) # is threshold in list? if threshold and threshold['label'] not in thresholds: # maybe a custom entered number? # ... I guess we don't really care as long as we produce the same the user entered. (validate?) thresholds[threshold['label']] = threshold['label'] dsobj = brain.getObject() dsmd = IBCCVLMetadata(dsobj) item['datasets'].append({ 'uuid': brain.UID, 'title': brain.Title, 'selected': brain.UID in self.value[experiment_uuid], 'threshold': threshold, 'thresholds': thresholds, 'brain': brain, 'md': dsmd, 'obj': dsobj, # TODO: this correct? only one layer ever? 'layermd': dsmd['layers'].values()[0] }) yield item
def getGenreSchemata(self): schemata = [] md = IBCCVLMetadata(self.context) genre = md.get('genre') if genre in self.genre_interface_map: schemata.append(self.genre_interface_map[genre]) if IBlobDataset.providedBy(self.context): schemata.append(IBlobDataset) if IRemoteDataset.providedBy(self.context): schemata.append(IRemoteDataset) return schemata
def subitems(self, dsbrain): # return a generator of selectable items within dataset md = IBCCVLMetadata(dsbrain.getObject()) layer_vocab = self.dstools.layer_vocab selectedlayers = self.value.get(dsbrain.UID) or () for layer in sorted(md.get('layers', ())): subitem = { 'id': layer, 'title': layer_vocab.getTerm(layer).title, 'selected': layer in selectedlayers, } yield subitem
def DatasetSearchableText(obj, **kw): md = IBCCVLMetadata(obj) entries = [ safe_unicode(obj.id), safe_unicode(obj.title) or u"", safe_unicode(obj.description) or u"" ] if 'layers' in md: layer_vocab = getUtility(IVocabularyFactory, 'layer_source')(obj) for key in md['layers']: if key not in layer_vocab: continue entries.append( safe_unicode(layer_vocab.getTerm(key).title) or u"" ) if 'species' in md: entries.extend(( safe_unicode(md.get('species', {}).get('scientificName')) or u"", safe_unicode(md.get('species', {}).get('vernacularName')) or u"", )) if "Future datasets" in obj.subject: # year, gcm, emsc emsc_vocab = getUtility(IVocabularyFactory, 'emsc_source')(obj) gcm_vocab = getUtility(IVocabularyFactory, 'gcm_source')(obj) year = unicode(md.get('year', u'')) month = unicode(md.get('month', u'')) if md.get('emsc') in emsc_vocab: entries.append( safe_unicode(emsc_vocab.getTerm(md['emsc']).title) or u"" ) if md.get('gcm') in gcm_vocab: entries.append( safe_unicode(gcm_vocab.getTerm(md['gcm']).title) or u"" ) entries.append(year) entries.append(month) elif "Current datasets" in obj.subject: entries.append(u"current") return u" ".join(entries)
def rat(self): uuid = self.request.form.get("uuid") layer = self.request.form.get("layer") brain = None try: brain = uuidToCatalogBrain(uuid) except Exception as e: LOG.error("Caught exception %s", e) if not brain: self.record_error("Not Found", 404, "dataset not found", {"parameter": "uuid"}) raise NotFound(self, "metadata", self.request) md = IBCCVLMetadata(brain.getObject()) if not layer and layer not in md.get("layers", {}): self.record_error("Bad Request", 400, "Missing parameter layer", {"parameter": "layer"}) raise BadRequest("Missing parameter layer") try: rat = md.get("layers", {}).get(layer, {}).get("rat") rat = json.loads(unicode(rat)) return rat except Exception as e: LOG.warning("Couldn't decode Raster Attribute Table from metadata. %s: %s", self.context, repr(e)) raise NotFound(self, "rat", self.request)
def DatasetSearchableText(obj, **kw): md = IBCCVLMetadata(obj) entries = [ safe_unicode(obj.id), safe_unicode(obj.title) or u"", safe_unicode(obj.description) or u"" ] if 'layers' in md: layer_vocab = getUtility(IVocabularyFactory, 'layer_source')(obj) for key in md['layers']: if key not in layer_vocab: continue entries.append( safe_unicode(layer_vocab.getTerm(key).title) or u"" ) if 'species' in md: entries.extend(( safe_unicode(md.get('species', {}).get('scientificName')) or u"", safe_unicode(md.get('species', {}).get('vernacularName')) or u"", )) if md.get('genre') == "DataGenreFC": # year, gcm, emsc emsc_vocab = getUtility(IVocabularyFactory, 'emsc_source')(obj) gcm_vocab = getUtility(IVocabularyFactory, 'gcm_source')(obj) year = Period(md.get('period','')).start if md['emsc'] in emsc_vocab: entries.append( safe_unicode(emsc_vocab.getTerm(md['emsc']).title) or u"" ) if md['gcm'] in gcm_vocab: entries.append( safe_unicode(gcm_vocab.getTerm(md['gcm']).title) or u"" ) entries.append(safe_unicode(year) or u"") elif md.get('genre') == "DataGenreCC": entries.append(u"current") return u" ".join(entries)
def biodiverse_listing_details(expbrain): details = {} exp = expbrain.getObject() species = set() years = set() months = set() emscs = set() gcms = set() for dsuuid in chain.from_iterable(map(lambda x: x.keys(), exp.projection.itervalues())): dsobj = uuidToObject(dsuuid) # TODO: should inform user about missing dataset if dsobj: md = IBCCVLMetadata(dsobj) species.add(md.get('species', {}).get('scientificName', u'(Unavailable)')) year = md.get('year') if year: years.add(year) month = md.get('month') if month: months.add(month) gcm = md.get('gcm') if gcm: gcms.add(gcm) emsc = md.get('emsc') if emsc: emscs.add(emsc) details.update({ 'type': 'BIODIVERSE', 'functions': 'endemism, redundancy', 'species_occurrence': ', '.join(sorted(species)), 'species_absence': '{}, {}'.format(', '.join(sorted(emscs)), ', '.join(sorted(gcms))), 'years': ', '.join(sorted(years)), 'months': ', '.join(sorted(months)) }) return details
def biodiverse_listing_details(expbrain): details = {} exp = expbrain.getObject() species = set() years = set() emscs = set() gcms = set() for dsuuid in chain.from_iterable( map(lambda x: x.keys(), exp.projection.itervalues())): dsobj = uuidToObject(dsuuid) # TODO: should inform user about missing dataset if dsobj: md = IBCCVLMetadata(dsobj) species.add(md.get('species', {}).get('scientificName', '')) period = md.get('temporal') if period: years.add(Period(period).start) gcm = md.get('gcm') if gcm: gcms.add(gcm) emsc = md.get('emsc') if emsc: emscs.add(emsc) details.update({ 'type': 'BIODIVERSE', 'functions': 'endemism, redundancy', 'species_occurrence': ', '.join(sorted(species)), 'species_absence': '{}, {}'.format(', '.join(sorted(emscs)), ', '.join(sorted(gcms))), 'years': ', '.join(sorted(years)) }) return details
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['species_distribution_models'] params['species_distribution_models'] = getdatasetparams(uuid) # do biomod name mangling of species name params['species_distribution_models']['species'] = re.sub(u"[ _'\"/\(\)\{\}\[\]]", u".", params['species_distribution_models'].get('species', u"Unknown")) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params['species_distribution_models']['layers'] = sdmmd.get('layers_used', None) # do future climate layers climatelist = [] for uuid, layers in params['future_climate_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'layer': layer, 'zippath': dsinfo['layers'][layer]['filename'], # TODO: add year, gcm, emsc here? 'type': dsinfo['layers'][layer]['datatype'], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] climatelist.append(dsdata) # replace climate_models parameter params['future_climate_datasets'] = climatelist params['selected_models'] = 'all' # projection.name from dsinfo # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data params['projection_name'], _ = os.path.splitext(dsinfo['filename']) # TODO: quick fix Decimal json encoding through celery (where is my custom json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add hints for worker workerhints = { 'files': ('species_distribution_models', 'future_climate_datasets') } return {'env': {}, 'params': params, 'worker': workerhints}
def items(self): # return dict with keys for experiment # and subkey 'models' for models within experiment if self.value: for experiment_uuid, model_uuids in self.value.items(): item = {} expbrain = uuidToCatalogBrain(experiment_uuid) item['title'] = expbrain.Title item['uuid'] = expbrain.UID # TODO: what else wolud I need from an experiment? exp = expbrain.getObject() expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') item['brain'] = expbrain # now search all models within and add infos pc = getToolByName(self.context, 'portal_catalog') brains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=self.genre) filtered_brains = [] for brain in brains: # get algorithm term algoid = getattr(brain.getObject(), 'job_params', {}).get('function') # Filter out geographic models if algoid not in [ 'circles', 'convhull', 'geoDist', 'geoIDW', 'voronoiHull' ]: filtered_brains.append(brain) brains = filtered_brains # TODO: maybe as generator? item['subitems'] = [{ 'uuid': brain.UID, 'title': brain.Title, 'obj': brain.getObject(), 'md': IBCCVLMetadata(brain.getObject()), 'selected': brain.UID in self.value[experiment_uuid] } for brain in brains] yield item
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['species_distribution_models'] params['species_distribution_models'] = getdatasetparams(uuid) # do biomod name mangling of species name params['species_distribution_models']['species'] = re.sub( u"[ _]", u".", params['species_distribution_models'].get('species', u"Unknown")) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params['species_distribution_models']['layers'] = sdmmd.get( 'layers_used', None) # do future climate layers climatelist = [] for uuid, layers in params['future_climate_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'internalurl': dsinfo['internalurl'], 'layer': layer, 'zippath': dsinfo['layers'][layer]['filename'], # TODO: add year, gcm, emsc here? 'type': dsinfo['layers'][layer]['datatype'], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] climatelist.append(dsdata) # replace climate_models parameter params['future_climate_datasets'] = climatelist params['selected_models'] = 'all' # projection.name from dsinfo # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data params['projection_name'], _ = os.path.splitext(dsinfo['filename']) # add hints for worker workerhints = { 'files': ('species_distribution_models', 'future_climate_datasets') } return {'env': {}, 'params': params, 'worker': workerhints}
def getRAT(self, datasetid, layer=None): query = {'UID': datasetid} dsbrain = dataset.query(self.context, brains=True, **query) if dsbrain: # get first brain from list dsbrain = next(dsbrain, None) if not dsbrain: raise NotFound(self.context, datasetid, self.request) md = IBCCVLMetadata(dsbrain.getObject()) rat = md.get('layers', {}).get(layer, {}).get('rat') # if we have a rat, let's try and parse it if rat: try: rat = json.loads(unicode(rat)) except Exception as e: LOG.warning("Couldn't decode Raster Attribute Table from metadata. %s: %s", self.context, repr(e)) rat = None return rat
def find_projections(ctx, emission_scenarios, climate_models, years, resolution=None): """Find Projection datasets for given criteria""" pc = getToolByName(ctx, 'portal_catalog') result = [] params = { 'BCCEmissionScenario': emission_scenarios, 'BCCGlobalClimateModel': climate_models, 'BCCDataGenre': 'DataGenreFC' } if resolution: params['BCCResolution'] = resolution brains = pc.searchResults(**params) for brain in brains: md = IBCCVLMetadata(brain.getObject()) year = md.get('temporal', None) if year in years: # TODO: yield? result.append(brain) return result
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params["species_distribution_models"] params["species_distribution_models"] = getdatasetparams(uuid) # do biomod name mangling of species name params["species_distribution_models"]["species"] = re.sub( u"[ _'\"/\(\)\{\}\[\]]", u".", params["species_distribution_models"].get("species", u"Unknown") ) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params["species_distribution_models"]["layers"] = sdmmd.get("layers_used", None) # do future climate layers climatelist = [] for uuid, layers in params["future_climate_datasets"].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { "uuid": dsinfo["uuid"], "filename": dsinfo["filename"], "downloadurl": dsinfo["downloadurl"], "internalurl": dsinfo["internalurl"], "layer": layer, "zippath": dsinfo["layers"][layer]["filename"], # TODO: add year, gcm, emsc here? "type": dsinfo["layers"][layer]["datatype"], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo["filename"].endswith(".zip"): dsdata["zippath"] = dsinfo["layers"][layer]["filename"] climatelist.append(dsdata) # replace climate_models parameter params["future_climate_datasets"] = climatelist params["selected_models"] = "all" # projection.name from dsinfo # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data params["projection_name"], _ = os.path.splitext(dsinfo["filename"]) # add hints for worker workerhints = {"files": ("species_distribution_models", "future_climate_datasets")} return {"env": {}, "params": params, "worker": workerhints}
def items(self): # return dict with keys for experiment # and subkey 'models' for models within experiment if self.value: for experiment_uuid, model_uuids in self.value.items(): item = {} expbrain = uuidToCatalogBrain(experiment_uuid) item['title'] = expbrain.Title item['uuid'] = expbrain.UID # TODO: what else wolud I need from an experiment? exp = expbrain.getObject() expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') item['brain'] = expbrain # now search all models within and add infos pc = getToolByName(self.context, 'portal_catalog') brains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=self.genre) filtered_brains = [] for brain in brains: # get algorithm term algoid = getattr(brain.getObject(), 'job_params', {}).get('function') # Filter out geographic models if algoid not in ['circles', 'convhull', 'geoDist', 'geoIDW', 'voronoiHull']: filtered_brains.append(brain) brains = filtered_brains # TODO: maybe as generator? item['subitems'] = [{'uuid': brain.UID, 'title': brain.Title, 'obj': brain.getObject(), 'md': IBCCVLMetadata(brain.getObject()), 'selected': brain.UID in self.value[experiment_uuid]} for brain in brains] yield item
def __generateParameters(self, params, portal_type): # This code formats the input parameters to experiments, and is a mirror "copy" of get_sdm_params, # get_project_params, get_biodiverse_params, get_traits_params, get_ensemble_params in org.bccvl.compute. inp = deepcopy(params) for key, val in inp.items(): if key in ('modelling_region', 'projection_region'): if val: val = params[key].data else: val = '{0}/API/em/v1/constraintregion?uuid={1}'.format( getSite().absolute_url(), IUUID(self.context)) if key in ('species_occurrence_dataset', 'species_absence_dataset'): if val: val = getdatasetparams(val) val['species'] = re.sub(u"[ _,\-'\"/\(\)\{\}\[\]]", u".", val.get('species', u'Unknown')) if key in ('environmental_datasets', 'future_climate_datasets'): envlist = [] for uuid, layers in val.items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer][ 'filename'] envlist.append(dsdata) val = envlist # for SDM model as input to Climate Change experiement if key == 'species_distribution_models': if val: uuid = val val = getdatasetparams(uuid) val['species'] = re.sub(u"[ _\-'\"/\(\)\{\}\[\]]", u".", val.get('species', u"Unknown")) sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) val['layers'] = sdmmd.get('layers_used', None) # do SDM projection results sdm_projections = [] for resuuid in inp['sdm_projections']: sdm_projections.append(getdatasetparams(resuuid)) inp['sdm_projections'] = sdm_projections # for projection as input to Biodiverse experiment if key == 'projections': dslist = [] for dsparam in val: dsinfo = getdatasetparams(dsparam['dataset']) dsinfo['threshold'] = dsparam['threshold'] # Convert threshold value from Decimal to float for thkey, thvalue in dsinfo['threshold'].items(): if isinstance(thvalue, Decimal): dsinfo['threshold'][thkey] = float(thvalue) dslist.append(dsinfo) # replace projections param val = dslist # projection models as input to Ensemble experiment if key == 'datasets': dslist = [] for uuid in val: dslist.append(getdatasetparams(uuid)) # replace datasets param val = dslist # for trait dataset as input to Species Trait Modelling experiment if key == 'traits_dataset': dsinfo = getdatasetparams(val) if dsinfo['filename'].endswith('.zip'): dsinfo['zippath'] = dsinfo['layers'].values( )[0]['filename'] val = dsinfo if isinstance(val, Decimal): val = float(val) inp[key] = val if portal_type == ('org.bccvl.content.sdmexperiment', 'org.bccvl.content.msdmexperiment', 'org.bccvl.content.mmexperiment'): inp.update({ 'rescale_all_models': False, 'selected_models': 'all', 'modeling_id': 'bccvl', # generic dismo params 'tails': 'both', }) elif portal_type == 'org.bccvl.content.projectionexperiment': inp.update({ 'selected_models': 'all', 'projection_name': os.path.splitext(dsinfo['filename'])[0] }) inputParams = { # example of input/ouput directories 'env': { 'inputdir': './input', 'outputdir': './output', 'scriptdir': './script', 'workdir': './workdir' }, 'params': inp } return json.dumps(inputParams, default=str, indent=4)
def _createProvenance(self, result): provdata = IProvenanceData(result) from rdflib import URIRef, Literal, Namespace, Graph from rdflib.namespace import RDF, RDFS, FOAF, DCTERMS, XSD from rdflib.resource import Resource PROV = Namespace(u"http://www.w3.org/ns/prov#") BCCVL = Namespace(u"http://ns.bccvl.org.au/") LOCAL = Namespace(u"urn:bccvl:") graph = Graph() # the user is our agent member = api.user.get_current() username = member.getProperty('fullname') or member.getId() user = Resource(graph, LOCAL['user']) user.add(RDF['type'], PROV['Agent']) user.add(RDF['type'], FOAF['Person']) user.add(FOAF['name'], Literal(username)) user.add(FOAF['mbox'], URIRef('mailto:{}'.format(member.getProperty('email')))) # add software as agent software = Resource(graph, LOCAL['software']) software.add(RDF['type'], PROV['Agent']) software.add(RDF['type'], PROV['SoftwareAgent']) software.add(FOAF['name'], Literal('BCCVL Job Script')) # script content is stored somewhere on result and will be exported with zip? # ... or store along with pstats.json ? hidden from user # -> execenvironment after import -> log output? # -> source code ... maybe some link expression? stored on result ? separate entity? activity = Resource(graph, LOCAL['activity']) activity.add(RDF['type'], PROV['Activity']) # TODO: this is rather queued or created time for this activity ... could capture real start time on running status update (or start transfer) now = datetime.now().replace(microsecond=0) activity.add(PROV['startedAtTime'], Literal(now.isoformat(), datatype=XSD['dateTime'])) activity.add(PROV['hasAssociationWith'], user) activity.add(PROV['hasAssociationWith'], software) # add job parameters to activity for idx, (key, value) in enumerate(result.job_params.items()): param = Resource(graph, LOCAL[u'param_{}'.format(idx)]) activity.add(BCCVL['algoparam'], param) param.add(BCCVL['name'], Literal(key)) # We have only dataset references as parameters if key in ('data_table', ): param.add(BCCVL['value'], LOCAL[dsuuid]) else: param.add(BCCVL['value'], Literal(value)) # iterate over all input datasets and add them as entities for key in ('data_table', ): dsbrain = uuidToCatalogBrain(result.job_params[key]) if not dsbrain: continue ds = dsbrain.getObject() dsprov = Resource(graph, LOCAL[result.job_params[key]]) dsprov.add(RDF['type'], PROV['Entity']) #dsprov.add(PROV['..'], Literal('')) dsprov.add(DCTERMS['creator'], Literal(ds.Creator())) dsprov.add(DCTERMS['title'], Literal(ds.title)) dsprov.add(DCTERMS['description'], Literal(ds.description)) dsprov.add(DCTERMS['rights'], Literal(ds.rights)) # ds.rightsstatement dsprov.add(DCTERMS['format'], Literal(ds.file.contentType)) # location / source # graph.add(uri, DCTERMS['source'], Literal('')) # TODO: genre ... # TODO: resolution # species metadata md = IBCCVLMetadata(ds) # dsprov.add(BCCVL['scientificName'], Literal(md['species']['scientificName'])) # dsprov.add(BCCVL['taxonID'], URIRef(md['species']['taxonID'])) # ... species data, ... species id for layer in md.get('layers_used', ()): dsprov.add(BCCVL['layer'], LOCAL[layer]) # link with activity activity.add(PROV['used'], dsprov) provdata.data = graph.serialize(format="turtle")
def __createExpmetadata(self, job_params): # To do: add other R package versions dynamically # Get experiment title self.md['Model specifications'] = { 'Title': self.context.title, 'Date/time run': self.context.creation_date.__str__(), 'Description': self.context.description or '' } # iterate over all input datasets and add them as entities self.md['Input datasets:'] = {} for key in ('species_occurrence_dataset', 'species_absence_dataset', 'traits_dataset'): spmd = {} if not job_params.has_key(key): continue dsbrain = uuidToCatalogBrain(job_params[key]) if not dsbrain: continue ds = dsbrain.getObject() mdata = IBCCVLMetadata(ds) if mdata and mdata.get('rows', None): spmd = {'Title': "{} ({})".format(ds.title, mdata.get('rows'))} else: spmd = {'Title': ds.title} info = IDownloadInfo(ds) spmd['Download URL'] = info['url'] coll = ds while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ spmd['Description'] = ds.description or coll.description or '' attribution = ds.attribution or getattr(coll, 'attribution') or '' if isinstance(attribution, list): attribution = '\n'.join([att.raw for att in attribution]) spmd['Attribution'] = attribution self.md['Input datasets:'][key] = spmd key = 'traits_dataset_params' if key in job_params: self.md['Input datasets:'][key] = job_params.get(key, {}) # pseudo-absence metadata. key = u"pseudo_absence_dataset" pa_file = self.context.get('pseudo_absences.csv') pa_url = "" pa_title = "" if pa_file: pa_title = pa_file.title pa_url = pa_file.absolute_url() pa_url = '{}/@@download/{}'.format(pa_url, os.path.basename(pa_url)) pamd = { 'Title': pa_title, 'Download URL': pa_url, 'Pseudo-absence Strategy': job_params.get('pa_strategy', ''), 'Pseudo-absence Ratio' : str(job_params.get('pa_ratio', '')) } if job_params.get('pa_strategy', '') == 'disc': pamd['Minimum distance'] = str(job_params.get('pa_disk_min', '')) pamd['Maximum distance'] = str(job_params.get('pa_disk_max', '')) if job_params.get('pa_strategy', '') == 'sre': pamd['Quantile'] = str(job_params.get('pa_sre_quant', '')) self.md['Input datasets:'][key] = pamd for key in ['environmental_datasets', 'future_climate_datasets']: if key not in job_params: continue env_list = [] layer_vocab = getUtility(IVocabularyFactory, 'layer_source')(self.context) for uuid, layers in job_params[key].items(): ds = uuidToObject(uuid) coll = ds while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ description = ds.description or coll.description attribution = ds.attribution or getattr(coll, 'attribution') or '' if isinstance(attribution, list): attribution = '\n'.join([att.raw for att in attribution]) layer_titles = [layer_vocab.getLayerTitle(layer) for layer in layers] env_list.append({ 'Title': ds.title, 'Layers': u'\n'.join(layer_titles), 'Description': description, 'Attribution': attribution }) self.md['Input datasets:'][key] = env_list key = "datasets" if key in job_params: dataset_list = [] for uid in job_params[key]: dsbrain = uuidToCatalogBrain(uid) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ dataset_list.append({ 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format(ds.absolute_url(), os.path.basename(ds.absolute_url())) , 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Resolution': IBCCVLMetadata(ds).get('resolution', '') }) self.md['Input datasets:'][key] = dataset_list key = 'species_distribution_models' if key in job_params: dsbrain = uuidToCatalogBrain(job_params[key]) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ # get the threshold threshold = self.context.species_distribution_models.get(source_exp.UID(), {}).get(ds.UID()) self.md['Input datasets:'][key] = { 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format(ds.absolute_url(), os.path.basename(ds.absolute_url())) , 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Threshold': "{}({})".format(threshold.get('label', ''), str(threshold.get('value', ''))) } key = 'projections' if key in job_params: for pds in job_params[key]: threshold = pds.get('threshold', {}) dsbrain = uuidToCatalogBrain(pds.get('dataset')) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ self.md['Input datasets:'][key] = { 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format(ds.absolute_url(), os.path.basename(ds.absolute_url())) , 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Threshold': "{}({})".format(threshold.get('label', ''), str(threshold.get('value', ''))), 'Biodiverse Cell size (m)': str(job_params.get('cluster_size', '')) } # Projection experiment does not have algorithm as input if not IProjectionExperiment.providedBy(self.context.__parent__): for key in ['function', 'algorithm']: if key in job_params: self.md['Algorithm settings:'] = { 'Algorithm Name': job_params[key], 'Configuration options': self.__algoConfigOption(job_params[key], job_params) } # Construct the text mdtext = StringIO.StringIO() for heading in [ 'BCCVL model outputs guide', 'System specifications', 'Model specifications', 'Input datasets:', 'Algorithm settings:', 'Model outputs:']: mdtext.write(self.__getMetadataText(heading, self.md)) return mdtext.getvalue()
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['species_distribution_models'] params['species_distribution_models'] = getdatasetparams(uuid) # do biomod name mangling of species name params['species_distribution_models']['species'] = re.sub(u"[ _\-'\"/\(\)\{\}\[\]]", u".", params['species_distribution_models'].get('species', u"Unknown")) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params['species_distribution_models']['layers'] = sdmmd.get('layers_used', None) # do SDM projection results sdm_projections = [] for resuuid in params['sdm_projections']: sdm_projections.append(getdatasetparams(resuuid)) params['sdm_projections'] = sdm_projections # do future climate layers climatelist = [] for uuid, layers in params['future_climate_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'layer': layer, # TODO: add year, gcm, emsc here? 'type': dsinfo['layers'][layer]['datatype'], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data if params['selected_future_layers'] and layer in params['selected_future_layers']: params['projection_name'], _ = os.path.splitext(dsinfo['filename']) climatelist.append(dsdata) # replace climate_models parameter params['future_climate_datasets'] = climatelist params['selected_models'] = 'all' # In case no future climate layer is selected if not params.get('projection_name'): params['projection_name'], _ = os.path.splitext(dsinfo['filename']) # TODO: quick fix Decimal json encoding through celery (where is my custom json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # Get the content of the projection_region BlobFile. # Note: deepcopy does not copy the content of BlobFile. params['projection_region'] = { 'uuid': IUUID(result), 'filename': 'projection_region.json', 'downloadurl': '{0}/API/em/v1/constraintregion?uuid={1}'.format(getSite().absolute_url(), IUUID(result)), } # add hints for worker workerhints = { 'files': ('species_distribution_models', 'future_climate_datasets', 'sdm_projections', 'projection_region',) } return {'env': {}, 'params': params, 'worker': workerhints}
def export_to_ala(self): uuid = self.request.form.get('uuid', None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: raise Exception("Invalid user") # verify dataset if obj.portal_type not in ( 'org.bccvl.content.dataset', 'org.bccvl.content.remotedataset', 'org.bccvl.content.multispeciesdataset'): raise Exception("Invalid UUID (content type)") md = IBCCVLMetadata(obj) if md.get('genre') not in ('DataGenreSpeciesOccurrence', 'DataGenreSpeciesCollection', 'DataGenreTraits'): raise Exception("Invalid UUID (data type)") # get download url dlinfo = IDownloadInfo(obj) # download file from org.bccvl import movelib from org.bccvl.movelib.utils import build_source, build_destination import tempfile destdir = tempfile.mkdtemp(prefix='export_to_ala') try: from org.bccvl.tasks.celery import app settings = app.conf.get('bccvl', {}) dest = os.path.join(destdir, os.path.basename(dlinfo['url'])) movelib.move(build_source(dlinfo['url'], user['id'], settings), build_destination('file://{}'.format(dest))) csvfile = None if dlinfo['contenttype'] == 'application/zip': # loox at 'layers' to find file within zip arc = md['layers'].keys()[0] import zipfile zf = zipfile.ZipFile(dest, 'r') csvfile = zf.open(arc, 'r') else: csvfile = open(dest, 'rb') import requests # "Accept:application/json" "Origin:http://example.com" res = requests.post(settings['ala']['sandboxurl'], files={'file': csvfile}, headers={ 'apikey': settings['ala']['apikey'], 'Accept': 'application/json' }) if res.status_code != 200: self.record_error(res.reason, res.status_code) raise Exception('Upload failed') retval = res.json() # TODO: do error checking # keys: sandboxUrl, fileName, message, error: Bool, fileId return retval finally: import shutil shutil.rmtree(destdir) except Exception as e: self.record_error(str(e), 500) raise
def item(self): # return dict with keys for experiment # and subkey 'models' for models within experiment item = {} if self.value: experiment_uuid = self.value.keys()[0] expbrain = uuidToCatalogBrain(experiment_uuid) if expbrain is None: return { 'title': u'Not Available', 'uuid': experiment_uuid, 'subitems': [] # models } item['title'] = expbrain.Title item['uuid'] = expbrain.UID exp = expbrain.getObject() # TODO: To get layers of all subsets? if getattr(exp, 'datasubsets', None): env_datasets = exp.datasubsets[0].get('environmental_datasets') item['layers'] = set((chain(*env_datasets.values()))) else: item['layers'] = set( (chain(*exp.environmental_datasets.values()))) expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') # now search all models within and add infos pc = getToolByName(self.context, 'portal_catalog') # only get result folders that are completed brains = pc.searchResults(path={ 'query': expbrain.getPath(), 'depth': 1 }, portal_type='Folder', job_state='COMPLETED') # TODO: maybe as generator? item['subitems'] = [] for fldbrain in brains: # Get the SDM model from result folder brain = pc.searchResults(path=fldbrain.getPath(), BCCDataGenre=self.genre) if not brain: # ignore this folder, as it does not have a result we want continue brain = brain[0] # get algorithm term algoid = getattr(brain.getObject(), 'job_params', {}).get('function') algobrain = self.algo_dict.get(algoid, None) # Filter out geographic models if algobrain.getObject().algorithm_category == 'geographic': continue # FIXME: I need a different list of thresholds for display; # esp. don't look up threshold, but take vales (threshold # id and value) from field as is thresholds = dataset.getThresholds(brain.UID)[brain.UID] threshold = self.value[experiment_uuid].get(brain.UID) # is threshold in list? if threshold and threshold['label'] not in thresholds: # maybe a custom entered number? # ... I guess we don't really care as long as we produce the same the user entered. (validate?) thresholds[threshold['label']] = threshold['label'] # current projection tiff file, and its metadata cpbrains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=['DataGenreCP']) cpmd = IBCCVLMetadata(cpbrains[0].getObject()) item['subitems'].append({ 'item': brain, 'uuid': brain.UID, 'title': brain.Title, 'selected': brain.UID in self.value[experiment_uuid], 'algorithm': algobrain, 'threshold': threshold, 'thresholds': thresholds, 'layermd': cpmd['layers'].values()[0] }) return item
def getProjectionDatasets(self): pc = getToolByName(self.context, 'portal_catalog') # to make it easire to produce required structure do separate queries # 1st query for all projection experiments projbrains = pc.searchResults( object_provides=(IProjectionExperiment.__identifier__, ISDMExperiment.__identifier__), sort_on='sortable_title') # date? # the list to collect results projections = [] for projbrain in projbrains: # get all result datasets from experiment and build list datasets = [] agg_species = set() agg_years = set() for dsbrain in pc.searchResults( path=projbrain.getPath(), BCCDataGenre=('DataGenreFP', 'DataGenreCP')): # get year, gcm, emsc, species, filename/title, fileuuid # TODO: Result is one file per species ... should this be a dict by species or year as well? ds = dsbrain.getObject() md = IBCCVLMetadata(ds) # parse year period = md.get('temporal') year = Period(period).start if period else None species = md.get('species', {}).get('scientificName') dsinfo = { # passible fields on brain: # Description, BCCResolution # ds.file.contentType # TODO: restructure ... tile, filename no list "title": dsbrain.Title, "uuid": dsbrain.UID, "files": [ds.file.filename], # filenames "year": year, # int or string? "gcm": md.get('gcm'), # URI? title? both?-> ui can fetch vocab to get titles "emsc": md.get('emsc'), "species": species, "resolution": dsbrain.BCCResolution, } # add info about sdm if 'DataGenreCP' in dsbrain.BCCDataGenre: sdmresult = ds.__parent__ # sdm = .... it's the model as sibling to this current projection ds sdm = ds # FIXME: wrong object here dsinfo['type'] = u"Current" else: sdmuuid = ds.__parent__.job_params['species_distribution_models'] sdm = uuidToCatalogBrain(sdmuuid).getObject() sdmresult = sdm.__parent__ dsinfo['type'] = u"Future" sdmexp = sdmresult.__parent__ dsinfo['sdm'] = { 'title': sdmexp.title, 'algorithm': sdmresult.job_params['function'], 'url': sdm.absolute_url() } datasets.append(dsinfo) agg_species.add(species) agg_years.add(year) # TODO: could also aggregate all data on projections result: # e.g. list all years, grms, emsc, aggregated from datasets projections.append({ "name": projbrain.Title, # TODO: rename to title "uuid": projbrain.UID, # TODO: rename to uuid "species": tuple(agg_species), "years": tuple(agg_years), "result": datasets }) # wrap in projections neccesarry? return {'projections': projections}
def headers(obj, **kw): md = IBCCVLMetadata(obj) return md.get('headers', None)
def export_to_ala(self): uuid = self.request.form.get("uuid", None) try: if uuid: brain = uuidToCatalogBrain(uuid) if brain is None: raise Exception("Brain not found") obj = brain.getObject() else: obj = self.context # get username member = ploneapi.user.get_current() if member.getId(): user = { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), } else: raise Exception("Invalid user") # verify dataset if obj.portal_type not in ( "org.bccvl.content.dataset", "org.bccvl.content.remotedataset", "org.bccvl.content.multispeciesdataset", ): raise Exception("Invalid UUID (content type)") md = IBCCVLMetadata(obj) if md.get("genre") not in ("DataGenreSpeciesOccurrence", "DataGenreTraits"): raise Exception("Invalid UUID (data type)") # get download url dlinfo = IDownloadInfo(obj) # download file from org.bccvl import movelib from org.bccvl.movelib.utils import build_source, build_destination import tempfile destdir = tempfile.mkdtemp(prefix="export_to_ala") try: from org.bccvl.tasks.celery import app settings = app.conf.get("bccvl", {}) dest = os.path.join(destdir, os.path.basename(dlinfo["url"])) movelib.move( build_source(dlinfo["url"], user["id"], settings), build_destination("file://{}".format(dest)) ) csvfile = None if dlinfo["contenttype"] == "application/zip": # loox at 'layers' to find file within zip arc = md["layers"].keys()[0] import zipfile zf = zipfile.ZipFile(dest, "r") csvfile = zf.open(arc, "r") else: csvfile = open(dest, "rb") import requests # "Accept:application/json" "Origin:http://example.com" res = requests.post( settings["ala"]["sandboxurl"], files={"file": csvfile}, headers={"apikey": settings["ala"]["apikey"], "Accept": "application/json"}, ) if res.status_code != 200: self.record_error(res.reason, res.status_code) raise Exception("Upload failed") retval = res.json() # TODO: do error checking # keys: sandboxUrl, fileName, message, error: Bool, fileId return retval finally: import shutil shutil.rmtree(destdir) except Exception as e: self.record_error(str(e), 500) raise
def month(obj, **kw): # FIXME: see year indexer above md = IBCCVLMetadata(obj) return md.get('month', None)
def upgrade_190_200_1(context, logger=None): if logger is None: logger = LOG # Run GS steps portal = api.portal.get() setup = getToolByName(context, 'portal_setup') setup.runImportStepFromProfile(PROFILE_ID, 'typeinfo') setup.runImportStepFromProfile(PROFILE_ID, 'plone.app.registry') setup.runImportStepFromProfile(PROFILE_ID, 'properties') setup.runImportStepFromProfile(PROFILE_ID, 'catalog') setup.runImportStepFromProfile(PROFILE_ID, 'propertiestool') setup.runImportStepFromProfile(PROFILE_ID, 'actions') setup.runImportStepFromProfile(PROFILE_ID, 'workflow') # set portal_type of all collections to 'org.bccvl.content.collection' for tlf in portal.datasets.values(): for coll in tlf.values(): if coll.portal_type == 'Folder': coll.portal_type = 'org.bccvl.content.collection' setup.runImportStepFromProfile(PROFILE_ID, 'org.bccvl.site.content') # rebuild the catalog to make sure new indices are populated logger.info("rebuilding catalog") pc = getToolByName(context, 'portal_catalog') pc.reindexIndex('BCCCategory', None) # add category to existing species data genre_map = { 'DataGenreSpeciesOccurrence': 'occurrence', 'DataGenreSpeciesAbsence': 'absence', 'DataGenreSpeciesAbundance': 'abundance', 'DataGenreCC': 'current', 'DataGenreFC': 'future', 'DataGenreE': 'environmental', 'DataGenreTraits': 'traits', } from org.bccvl.site.interfaces import IBCCVLMetadata for brain in pc(BCCDataGenre=genre_map.keys()): obj = brain.getObject() md = IBCCVLMetadata(obj) if not md.get('categories', None): md['categories'] = [genre_map[brain.BCCDataGenre]] obj.reindexObject() # update temporal and year an all datasets from org.bccvl.site.content.interfaces import IDataset import re for bran in pc(object_provides=IDataset.__identifier__): obj = brain.getObject() md = IBCCVLMetadata(obj) if hasattr(obj, 'rightsstatement'): del obj.rightsstatement # temporal may be an attribute or is in md if 'temporal' in md: if 'year' not in md: # copy temporal start to year sm = re.search(r'start=(.*?);', md['temporal']) if sm: md['year'] = int(sm.group(1)) # delete temporal del md['temporal'] obj.reindexObject() if 'year' not in md: LOG.info('MD not updated for:', brain.getPath) # clean up any local utilities from gu.z3cform.rdf count = 0 from zope.component import getSiteManager sm = getSiteManager() from zope.schema.interfaces import IVocabularyFactory from gu.z3cform.rdf.interfaces import IFresnelVocabularyFactory for vocab in [x for x in sm.getAllUtilitiesRegisteredFor(IVocabularyFactory) if IFresnelVocabularyFactory.providedBy(x)]: sm.utilities.unsubscribe((), IVocabularyFactory, vocab) count += 1 logger.info('Unregistered %d local vocabularies', count) # migrate OAuth configuration registry to use new interfaces from zope.schema import getFieldNames from .oauth.interfaces import IOAuth1Settings from .oauth.figshare import IFigshare registry = getUtility(IRegistry) # there is only Figshare there atm. coll = registry.collectionOfInterface(IOAuth1Settings) newcoll = registry.collectionOfInterface(IFigshare) for cid, rec in coll.items(): # add new newrec = newcoll.add(cid) newfields = getFieldNames(IFigshare) # copy all attributes over for field in getFieldNames(IOAuth1Settings): if field in newfields: setattr(newrec, field, getattr(rec, field)) # remove all old settings coll.clear() logger.info("Migrated OAuth1 settings to Figshare settings") for toolkit in portal[defaults.TOOLKITS_FOLDER_ID].values(): if hasattr(toolkit, 'interface'): del toolkit.interface if hasattr(toolkit, 'method'): del toolkit.method toolkit.reindexObject() # possible way to update interface used in registry collections: # 1. get collectionOfInterface(I...) ... get's Collections proxy # 2. use proxy.add(key) ... (add internally re-registers the given interface) # - do this for all entries in collections proxy logger.info("finished")
def item(self): # return dict with keys for experiment # and subkey 'models' for models within experiment item = {} if self.value: experiment_uuid = self.value.keys()[0] expbrain = uuidToCatalogBrain(experiment_uuid) if expbrain is None: return { 'title': u'Not Available', 'uuid': experiment_uuid, 'subitems': [] # models } item['title'] = expbrain.Title item['uuid'] = expbrain.UID exp = expbrain.getObject() # TODO: To get layers of all subsets? if getattr(exp, 'datasubsets', None): env_datasets = exp.datasubsets[0].get('environmental_datasets') item['layers'] = set((chain(*env_datasets.values()))) else: item['layers'] = set((chain(*exp.environmental_datasets.values()))) expmd = IBCCVLMetadata(exp) item['resolution'] = expmd.get('resolution') # now search all models within and add infos pc = getToolByName(self.context, 'portal_catalog') # only get result folders that are completed brains = pc.searchResults(path={'query': expbrain.getPath(), 'depth': 1}, portal_type='Folder', job_state='COMPLETED') # TODO: maybe as generator? item['subitems'] = [] for fldbrain in brains: # Get the SDM model from result folder brain = pc.searchResults(path=fldbrain.getPath(), BCCDataGenre=self.genre) if not brain: # ignore this folder, as it does not have a result we want continue brain = brain[0] # get algorithm term algoid = getattr(brain.getObject(), 'job_params', {}).get('function') algobrain = self.algo_dict.get(algoid, None) # Filter out geographic models if algobrain.getObject().algorithm_category == 'geographic': continue # FIXME: I need a different list of thresholds for display; # esp. don't look up threshold, but take vales (threshold # id and value) from field as is thresholds = dataset.getThresholds(brain.UID)[brain.UID] threshold = self.value[experiment_uuid].get(brain.UID) # is threshold in list? if threshold and threshold['label'] not in thresholds: # maybe a custom entered number? # ... I guess we don't really care as long as we produce the same the user entered. (validate?) thresholds[threshold['label']] = threshold['label'] # current projection tiff file, and its metadata cpbrains = pc.searchResults(path=expbrain.getPath(), BCCDataGenre=['DataGenreCP']) cpmd = IBCCVLMetadata(cpbrains[0].getObject()) item['subitems'].append( {'item': brain, 'uuid': brain.UID, 'title': brain.Title, 'selected': brain.UID in self.value[experiment_uuid], 'algorithm': algobrain, 'threshold': threshold, 'thresholds': thresholds, 'layermd': cpmd['layers'].values()[0] } ) return item
def handle_edit(self, action): success = u"Successfully updated" partly_success = u"Some of your changes could not be applied." status = no_changes = u"No changes made." for subform in self.subforms: # With the ``extractData()`` call, validation will occur, # and errors will be stored on the widgets amongst other # places. After this we have to be extra careful not to # call (as in ``__call__``) the subform again, since # that'll remove the errors again. With the results that # no changes are applied but also no validation error is # shown. data, errors = subform.extractData() if errors: if status is no_changes: status = subform.formErrorsMessage elif status is success: status = partly_success continue # Wo have no select field in our editsubform # del data['select'] self.context.before_update(subform.content, data) changes = subform.applyChanges(data) if changes: if status is no_changes: status = success elif status is subform.formErrorsMessage: status = partly_success # If there were changes, we'll update the view widgets # again, so that they'll actually display the changes for widget in subform.widgets.values(): if widget.mode == DISPLAY_MODE: widget.update() notify(AfterWidgetUpdateEvent(widget)) # update IBCCVLMetadata['layers'] with current entered values ds = self.context.context md = IBCCVLMetadata(ds) layers = md.get('layers', {}) # map filenames to layers file_map = {} for layer in layers.values(): file_map[layer['filename']] = layer # rebuild layers dict with correct keys and datatypes layers = {} for subform in self.subforms: if subform.content['layer']: layer_id = subform.content['layer'] else: layer_id = subform.content['filename'] layer = file_map[subform.content['filename']] layer['datatype'] = subform.content['datatype'] layer['layer'] = layer_id layers[layer_id] = layer # write changes back md['layers'] = layers modified(self.context.context) self.status = status self.context.redirect()
def __createExpmetadata(self, job_params): # To do: add other R package versions dynamically # Get experiment title self.md['Model specifications'] = { 'Title': self.context.title, 'Date/time run': self.context.creation_date.__str__(), 'Description': self.context.description or '' } # iterate over all input datasets and add them as entities self.md['Input datasets:'] = {} for key in ('species_occurrence_dataset', 'species_absence_dataset', 'traits_dataset'): spmd = {} if not job_params.has_key(key): continue dsbrain = uuidToCatalogBrain(job_params[key]) if not dsbrain: continue ds = dsbrain.getObject() mdata = IBCCVLMetadata(ds) if mdata and mdata.get('rows', None): spmd = {'Title': "{} ({})".format(ds.title, mdata.get('rows'))} else: spmd = {'Title': ds.title} info = IDownloadInfo(ds) spmd['Download URL'] = info['url'] coll = ds while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ spmd['Description'] = ds.description or coll.description or '' attribution = ds.attribution or getattr(coll, 'attribution') or '' if isinstance(attribution, list): attribution = '\n'.join([att.raw for att in attribution]) spmd['Attribution'] = attribution self.md['Input datasets:'][key] = spmd key = 'traits_dataset_params' if key in job_params: self.md['Input datasets:'][key] = job_params.get(key, {}) # pseudo-absence metadata. key = u"pseudo_absence_dataset" pa_file = self.context.get('pseudo_absences.csv') pa_url = "" pa_title = "" if pa_file: pa_title = pa_file.title pa_url = pa_file.absolute_url() pa_url = '{}/@@download/{}'.format(pa_url, os.path.basename(pa_url)) pamd = { 'Title': pa_title, 'Download URL': pa_url, 'Pseudo-absence Strategy': job_params.get('pa_strategy', ''), 'Pseudo-absence Ratio': str(job_params.get('pa_ratio', '')) } if job_params.get('pa_strategy', '') == 'disc': pamd['Minimum distance'] = str( job_params.get('pa_disk_min', '')) pamd['Maximum distance'] = str( job_params.get('pa_disk_max', '')) if job_params.get('pa_strategy', '') == 'sre': pamd['Quantile'] = str(job_params.get('pa_sre_quant', '')) self.md['Input datasets:'][key] = pamd for key in ['environmental_datasets', 'future_climate_datasets']: if key not in job_params: continue env_list = [] layer_vocab = getUtility(IVocabularyFactory, 'layer_source')(self.context) for uuid, layers in job_params[key].items(): ds = uuidToObject(uuid) coll = ds while not (ISiteRoot.providedBy(coll) or ICollection.providedBy(coll)): coll = coll.__parent__ description = ds.description or coll.description attribution = ds.attribution or getattr(coll, 'attribution') or '' if isinstance(attribution, list): attribution = '\n'.join([att.raw for att in attribution]) layer_titles = [ layer_vocab.getLayerTitle(layer) for layer in layers ] env_list.append({ 'Title': ds.title, 'Layers': u'\n'.join(layer_titles), 'Description': description, 'Attribution': attribution }) self.md['Input datasets:'][key] = env_list key = "datasets" if key in job_params: dataset_list = [] for uid in job_params[key]: dsbrain = uuidToCatalogBrain(uid) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ dataset_list.append({ 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format( ds.absolute_url(), os.path.basename(ds.absolute_url())), 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Resolution': IBCCVLMetadata(ds).get('resolution', '') }) self.md['Input datasets:'][key] = dataset_list key = 'species_distribution_models' if key in job_params: dsbrain = uuidToCatalogBrain(job_params[key]) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ # get the threshold threshold = self.context.species_distribution_models.get( source_exp.UID(), {}).get(ds.UID()) self.md['Input datasets:'][key] = { 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format( ds.absolute_url(), os.path.basename(ds.absolute_url())), 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Threshold': "{}({})".format(threshold.get('label', ''), str(threshold.get('value', ''))) } key = 'projections' if key in job_params: for pds in job_params[key]: threshold = pds.get('threshold', {}) dsbrain = uuidToCatalogBrain(pds.get('dataset')) if dsbrain: ds = dsbrain.getObject() # get the source experiment source_exp = ds.__parent__ while not IExperiment.providedBy(source_exp): source_exp = source_exp.__parent__ self.md['Input datasets:'][key] = { 'Source experiment': source_exp.title, 'Title': ds.title, 'Description': ds.description, 'Download URL': '{}/@@download/file/{}'.format( ds.absolute_url(), os.path.basename(ds.absolute_url())), 'Algorithm': ds.__parent__.job_params.get('function', ''), 'Species': IBCCVLMetadata(ds).get('species', {}).get('scientificName', ''), 'Threshold': "{}({})".format(threshold.get('label', ''), str(threshold.get('value', ''))), 'Biodiverse Cell size (m)': str(job_params.get('cluster_size', '')) } # Projection experiment does not have algorithm as input if not IProjectionExperiment.providedBy(self.context.__parent__): for key in ['function', 'algorithm']: if key in job_params: self.md['Algorithm settings:'] = { 'Algorithm Name': job_params[key], 'Configuration options': self.__algoConfigOption(job_params[key], job_params) } # Construct the text mdtext = StringIO.StringIO() for heading in [ 'BCCVL model outputs guide', 'System specifications', 'Model specifications', 'Input datasets:', 'Algorithm settings:', 'Model outputs:' ]: mdtext.write(self.__getMetadataText(heading, self.md)) return mdtext.getvalue()