def get_ensemble_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['datasets'] is a list of dataset uuids # and sholud become a list of dicts with datasetinfos dslist = [] for dsparam in params['datasets']: dslist.append(getdatasetparams(dsparam)) params['datasets'] = dslist sdm_projections = [] for uuid in params['sdm_projections']: sdm_projections.append(getdatasetparams(uuid)) params['sdm_projections'] = sdm_projections # TODO: quick fix Decimal json encoding through celery params['thresholds'] = [float(val) for val in params['thresholds']] workerhints = { 'files': ('datasets', 'sdm_projections',) } return {'env': {}, 'params': params, 'worker': workerhints}
def get_traits_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models for paramname in ('traits_dataset',): if not params.get(paramname, None): continue uuid = params[paramname] dsinfo = getdatasetparams(uuid) if dsinfo['filename'].endswith('.zip'): # FIXME: too many static assumptions about how an occurrence zip file looks like # layers:key does not match anything (should it?) # assumes exactly one file here # TODO: should I remove 'layers' section here? dsinfo['zippath'] = dsinfo['layers'].values()[0]['filename'] params[paramname] = dsinfo # TODO: This assumes we only zip file based layers envlist = [] envds = params.get('environmental_datasets') or {} for uuid, layers in envds.items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) # replace original dict params['environmental_datasets'] = envlist # Get the content of the modelling_region BlobFile. # Note: deepcopy does not copy the content of BlobFile. if result.job_params['modelling_region']: params['modelling_region'] = { 'uuid': IUUID(result), 'filename': 'modelling_region.json', 'downloadurl': '{0}/API/em/v1/constraintregion?uuid={1}'.format(getSite().absolute_url(), IUUID(result)), } # add hints for worker workerhints = { 'files': [x for x in ('traits_dataset', 'environmental_datasets', 'modelling_region',) if x in params] } return {'env': {}, 'params': params, 'worker': workerhints}
def get_sdm_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # TODO: names to fix up # occurrence-> species_occurrence_dataset # background-> species_absence_dataset # pseudoabsence['enabled']-> species_pseudo_absence_points, # pseudoabsence['points']-> species_number_pseudo_absence_points # layers+ environment{}-> environmental_datasets TODO: turn into list of files # get all necessary metadata for files, and add worker hints to download files for paramname in ('species_occurrence_dataset', 'species_absence_dataset'): # TODO: absence might be none uuid = params[paramname] params[paramname] = getdatasetparams(uuid) # replace all spaces and underscores to '.' (biomod does the same) # TODO: really necessary? if params[paramname]: params[paramname]['species'] = re.sub( u"[ _]", u".", params[paramname].get('species', u'Unknown')) # TODO: This assumes we only zip file based layers envlist = [] for uuid, layers in params['environmental_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'internalurl': dsinfo['internalurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) # replace original dict params['environmental_datasets'] = envlist # add hints for worker to download files workerhints = { 'files': ('species_occurrence_dataset', 'species_absence_dataset', 'environmental_datasets') } return {'env': {}, 'params': params, 'worker': workerhints}
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['species_distribution_models'] params['species_distribution_models'] = getdatasetparams(uuid) # do biomod name mangling of species name params['species_distribution_models']['species'] = re.sub(u"[ _'\"/\(\)\{\}\[\]]", u".", params['species_distribution_models'].get('species', u"Unknown")) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params['species_distribution_models']['layers'] = sdmmd.get('layers_used', None) # do future climate layers climatelist = [] for uuid, layers in params['future_climate_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'layer': layer, 'zippath': dsinfo['layers'][layer]['filename'], # TODO: add year, gcm, emsc here? 'type': dsinfo['layers'][layer]['datatype'], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] climatelist.append(dsdata) # replace climate_models parameter params['future_climate_datasets'] = climatelist params['selected_models'] = 'all' # projection.name from dsinfo # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data params['projection_name'], _ = os.path.splitext(dsinfo['filename']) # TODO: quick fix Decimal json encoding through celery (where is my custom json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add hints for worker workerhints = { 'files': ('species_distribution_models', 'future_climate_datasets') } return {'env': {}, 'params': params, 'worker': workerhints}
def get_sdm_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # TODO: names to fix up # occurrence-> species_occurrence_dataset # background-> species_absence_dataset # pseudoabsence['enabled']-> species_pseudo_absence_points, # pseudoabsence['points']-> species_number_pseudo_absence_points # layers+ environment{}-> environmental_datasets TODO: turn into list of files # get all necessary metadata for files, and add worker hints to download files for paramname in ('species_occurrence_dataset', 'species_absence_dataset'): # TODO: absence might be none uuid = params[paramname] params[paramname] = getdatasetparams(uuid) # replace all spaces and underscores to '.' (biomod does the same) # TODO: really necessary? if params[paramname]: params[paramname]['species'] = re.sub(u"[ _'\"/\(\)\{\}\[\]]", u".", params[paramname].get('species', u'Unknown')) # TODO: This assumes we only zip file based layers envlist = [] for uuid, layers in params['environmental_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'internalurl': dsinfo['internalurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) # replace original dict params['environmental_datasets'] = envlist # add hints for worker to download files workerhints = { 'files': ('species_occurrence_dataset', 'species_absence_dataset', 'environmental_datasets') } return {'env': {}, 'params': params, 'worker': workerhints}
def get_traits_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params["data_table"] params["data_table"] = getdatasetparams(uuid) # add hints for worker workerhints = {"files": ("data_table",)} return {"env": {}, "params": params, "worker": workerhints}
def get_traits_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['data_table'] params['data_table'] = getdatasetparams(uuid) # add hints for worker workerhints = {'files': ('data_table', )} return {'env': {}, 'params': params, 'worker': workerhints}
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['species_distribution_models'] params['species_distribution_models'] = getdatasetparams(uuid) # do biomod name mangling of species name params['species_distribution_models']['species'] = re.sub( u"[ _]", u".", params['species_distribution_models'].get('species', u"Unknown")) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params['species_distribution_models']['layers'] = sdmmd.get( 'layers_used', None) # do future climate layers climatelist = [] for uuid, layers in params['future_climate_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'internalurl': dsinfo['internalurl'], 'layer': layer, 'zippath': dsinfo['layers'][layer]['filename'], # TODO: add year, gcm, emsc here? 'type': dsinfo['layers'][layer]['datatype'], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] climatelist.append(dsdata) # replace climate_models parameter params['future_climate_datasets'] = climatelist params['selected_models'] = 'all' # projection.name from dsinfo # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data params['projection_name'], _ = os.path.splitext(dsinfo['filename']) # add hints for worker workerhints = { 'files': ('species_distribution_models', 'future_climate_datasets') } return {'env': {}, 'params': params, 'worker': workerhints}
def get_traits_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['data_table'] params['data_table'] = getdatasetparams(uuid) # add hints for worker workerhints = { 'files': ('data_table', ) } return {'env': {}, 'params': params, 'worker': workerhints}
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params["species_distribution_models"] params["species_distribution_models"] = getdatasetparams(uuid) # do biomod name mangling of species name params["species_distribution_models"]["species"] = re.sub( u"[ _'\"/\(\)\{\}\[\]]", u".", params["species_distribution_models"].get("species", u"Unknown") ) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params["species_distribution_models"]["layers"] = sdmmd.get("layers_used", None) # do future climate layers climatelist = [] for uuid, layers in params["future_climate_datasets"].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { "uuid": dsinfo["uuid"], "filename": dsinfo["filename"], "downloadurl": dsinfo["downloadurl"], "internalurl": dsinfo["internalurl"], "layer": layer, "zippath": dsinfo["layers"][layer]["filename"], # TODO: add year, gcm, emsc here? "type": dsinfo["layers"][layer]["datatype"], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo["filename"].endswith(".zip"): dsdata["zippath"] = dsinfo["layers"][layer]["filename"] climatelist.append(dsdata) # replace climate_models parameter params["future_climate_datasets"] = climatelist params["selected_models"] = "all" # projection.name from dsinfo # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data params["projection_name"], _ = os.path.splitext(dsinfo["filename"]) # add hints for worker workerhints = {"files": ("species_distribution_models", "future_climate_datasets")} return {"env": {}, "params": params, "worker": workerhints}
def get_ensemble_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['datasets'] is a list of dataset uuids # and sholud become a list of dicts with datasetinfos dslist = [] for dsparam in params['datasets']: dslist.append(getdatasetparams(dsparam)) # replace datasets param params['datasets'] = dslist workerhints = {'files': ('datasets', )} return {'env': {}, 'params': params, 'worker': workerhints}
def get_ensemble_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['datasets'] is a list of dataset uuids # and sholud become a list of dicts with datasetinfos dslist = [] for dsparam in params["datasets"]: dslist.append(getdatasetparams(dsparam)) # replace datasets param params["datasets"] = dslist workerhints = {"files": ("datasets",)} return {"env": {}, "params": params, "worker": workerhints}
def get_biodiverse_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['projections'] is a list of dicts with 'threshold' and 'uuid' # and sholud become a list of dicts with datasetinfos + threshold? dslist = [] for dsparam in params['projections']: dsinfo = getdatasetparams(dsparam['dataset']) dsinfo['threshold'] = dsparam['threshold'] dslist.append(dsinfo) # replace projections param params['projections'] = dslist workerhints = {'files': ('projections', )} return {'env': {}, 'params': params, 'worker': workerhints}
def get_ensemble_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['datasets'] is a list of dataset uuids # and sholud become a list of dicts with datasetinfos dslist = [] for dsparam in params['datasets']: dslist.append(getdatasetparams(dsparam)) # replace datasets param params['datasets'] = dslist workerhints = { 'files': ('datasets', ) } return {'env': {}, 'params': params, 'worker': workerhints}
def get_biodiverse_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['projections'] is a list of dicts with 'threshold' and 'uuid' # and sholud become a list of dicts with datasetinfos + threshold? dslist = [] for dsparam in params['projections']: dsinfo = getdatasetparams(dsparam['dataset']) dsinfo['threshold'] = dsparam['threshold'] dslist.append(dsinfo) # replace projections param params['projections'] = dslist workerhints = { 'files': ('projections', ) } return {'env': {}, 'params': params, 'worker': workerhints}
def get_biodiverse_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # params['projections'] is a list of dicts with 'threshold' and 'uuid' # and sholud become a list of dicts with datasetinfos + threshold? dslist = [] for dsparam in params['projections']: dsinfo = getdatasetparams(dsparam['dataset']) dsinfo['threshold'] = dsparam['threshold'] dslist.append(dsinfo) # replace projections param params['projections'] = dslist # TODO: quick fix Decimal json encoding through celery (where is my custom # json encoder gone?) # -> problem is oslo jsonutils, whihch patches anyjson with it's own # loads/dumps methods. # we would normally use simplejson, which supports decimal, but oslo # patches it in a way so that decimal no longer works for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # ptach threshold vasue as well for pds in params['projections']: thresholds = pds['threshold'] for key, item in thresholds.items(): if isinstance(item, Decimal): thresholds[key] = float(item) workerhints = { 'files': ('projections', ) } return {'env': {}, 'params': params, 'worker': workerhints}
def get_sdm_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # TODO: names to fix up # occurrence-> species_occurrence_dataset # background-> species_absence_dataset # pseudoabsence['enabled']-> species_pseudo_absence_points, # pseudoabsence['points']-> species_number_pseudo_absence_points # layers+ environment{}-> environmental_datasets TODO: turn into list of files # get all necessary metadata for files, and add worker hints to download files for paramname in ('species_occurrence_dataset', 'species_absence_dataset'): # Skip empty or non existing params if not params.get(paramname, None): continue uuid = params[paramname] dsinfo = getdatasetparams(uuid) if dsinfo['filename'].endswith('.zip'): # FIXME: too many static assumptions about how an occurrence zip file looks like # layers:key does not match anything (should it?) # assumes exactly one file here # TODO: should I remove 'layers' section here? dsinfo['zippath'] = dsinfo['layers'].values()[0]['filename'] params[paramname] = dsinfo # replace all spaces and underscores to '.' (biomod does the same) # TODO: really necessary? if params[paramname]: params[paramname]['species'] = re.sub(u"[ _,'\"/\(\)\{\}\[\]]", u".", params[paramname].get('species', u'Unknown')) # TODO: This assumes we only zip file based layers envlist = [] for uuid, layers in params['environmental_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) # replace original dict params['environmental_datasets'] = envlist # TODO: quick fix Decimal json encoding through celery (where is my custom json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # add hints for worker to download files workerhints = { # only those parameters that are actually in params dict 'files': [x for x in ('species_occurrence_dataset', 'species_absence_dataset', 'environmental_datasets') if x in params] } return {'env': {}, 'params': params, 'worker': workerhints}
def get_traits_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models for paramname in ('traits_dataset', ): if not params.get(paramname, None): continue uuid = params[paramname] dsinfo = getdatasetparams(uuid) if dsinfo['filename'].endswith('.zip'): # FIXME: too many static assumptions about how an occurrence zip file looks like # layers:key does not match anything (should it?) # assumes exactly one file here # TODO: should I remove 'layers' section here? dsinfo['zippath'] = dsinfo['layers'].values()[0]['filename'] params[paramname] = dsinfo # TODO: This assumes we only zip file based layers envlist = [] envds = params.get('environmental_datasets') or {} for uuid, layers in envds.items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) # replace original dict params['environmental_datasets'] = envlist # Get the content of the modelling_region BlobFile. # Note: deepcopy does not copy the content of BlobFile. if result.job_params['modelling_region']: params['modelling_region'] = { 'uuid': IUUID(result), 'filename': 'modelling_region.json', 'downloadurl': '{0}/API/em/v1/constraintregion?uuid={1}'.format( getSite().absolute_url(), IUUID(result)), } # add hints for worker workerhints = { 'files': [ x for x in ( 'traits_dataset', 'environmental_datasets', 'modelling_region', ) if x in params ] } return {'env': {}, 'params': params, 'worker': workerhints}
def __generateParameters(self, params, portal_type): # This code formats the input parameters to experiments, and is a mirror "copy" of get_sdm_params, # get_project_params, get_biodiverse_params, get_traits_params, get_ensemble_params in org.bccvl.compute. inp = deepcopy(params) for key, val in inp.items(): if key in ('modelling_region', 'projection_region'): if val: val = params[key].data else: val = '{0}/API/em/v1/constraintregion?uuid={1}'.format(getSite().absolute_url(), IUUID(self.context)) if key in ('species_occurrence_dataset', 'species_absence_dataset'): if val: val = getdatasetparams(val) val['species'] = re.sub(u"[ _,\-'\"/\(\)\{\}\[\]]", u".", val.get('species', u'Unknown')) if key in ('environmental_datasets', 'future_climate_datasets'): envlist = [] for uuid, layers in val.items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) val = envlist # for SDM model as input to Climate Change experiement if key == 'species_distribution_models': if val: uuid = val val = getdatasetparams(uuid) val['species'] = re.sub(u"[ _\-'\"/\(\)\{\}\[\]]", u".", val.get('species', u"Unknown")) sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) val['layers'] = sdmmd.get('layers_used', None) # do SDM projection results sdm_projections = [] for resuuid in inp['sdm_projections']: sdm_projections.append(getdatasetparams(resuuid)) inp['sdm_projections'] = sdm_projections # for projection as input to Biodiverse experiment if key == 'projections': dslist = [] for dsparam in val: dsinfo = getdatasetparams(dsparam['dataset']) dsinfo['threshold'] = dsparam['threshold'] # Convert threshold value from Decimal to float for thkey, thvalue in dsinfo['threshold'].items(): if isinstance(thvalue, Decimal): dsinfo['threshold'][thkey] = float(thvalue) dslist.append(dsinfo) # replace projections param val = dslist # projection models as input to Ensemble experiment if key == 'datasets': dslist = [] for uuid in val: dslist.append(getdatasetparams(uuid)) # replace datasets param val = dslist # for trait dataset as input to Species Trait Modelling experiment if key == 'traits_dataset': dsinfo = getdatasetparams(val) if dsinfo['filename'].endswith('.zip'): dsinfo['zippath'] = dsinfo['layers'].values()[0]['filename'] val = dsinfo if isinstance(val, Decimal): val = float(val) inp[key] = val if portal_type == ('org.bccvl.content.sdmexperiment', 'org.bccvl.content.msdmexperiment', 'org.bccvl.content.mmexperiment'): inp.update({ 'rescale_all_models': False, 'selected_models': 'all', 'modeling_id': 'bccvl', # generic dismo params 'tails': 'both', }) elif portal_type == 'org.bccvl.content.projectionexperiment': inp.update({ 'selected_models': 'all', 'projection_name': os.path.splitext(dsinfo['filename'])[0] }) inputParams = { # example of input/ouput directories 'env': { 'inputdir': './input', 'outputdir': './output', 'scriptdir': './script', 'workdir': './workdir' }, 'params': inp } return json.dumps(inputParams, default=str, indent=4)
def get_project_params(result): params = deepcopy(result.job_params) # get metadata for species_distribution_models uuid = params['species_distribution_models'] params['species_distribution_models'] = getdatasetparams(uuid) # do biomod name mangling of species name params['species_distribution_models']['species'] = re.sub(u"[ _\-'\"/\(\)\{\}\[\]]", u".", params['species_distribution_models'].get('species', u"Unknown")) # we need the layers from sdm to fetch correct files for climate_models # TODO: getdatasetparams should fetch 'layers' sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) params['species_distribution_models']['layers'] = sdmmd.get('layers_used', None) # do SDM projection results sdm_projections = [] for resuuid in params['sdm_projections']: sdm_projections.append(getdatasetparams(resuuid)) params['sdm_projections'] = sdm_projections # do future climate layers climatelist = [] for uuid, layers in params['future_climate_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], 'layer': layer, # TODO: add year, gcm, emsc here? 'type': dsinfo['layers'][layer]['datatype'], } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] # FIXME: workaround to get future projection name back, but this works only for file naming scheme with current available data if params['selected_future_layers'] and layer in params['selected_future_layers']: params['projection_name'], _ = os.path.splitext(dsinfo['filename']) climatelist.append(dsdata) # replace climate_models parameter params['future_climate_datasets'] = climatelist params['selected_models'] = 'all' # In case no future climate layer is selected if not params.get('projection_name'): params['projection_name'], _ = os.path.splitext(dsinfo['filename']) # TODO: quick fix Decimal json encoding through celery (where is my custom json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # Get the content of the projection_region BlobFile. # Note: deepcopy does not copy the content of BlobFile. params['projection_region'] = { 'uuid': IUUID(result), 'filename': 'projection_region.json', 'downloadurl': '{0}/API/em/v1/constraintregion?uuid={1}'.format(getSite().absolute_url(), IUUID(result)), } # add hints for worker workerhints = { 'files': ('species_distribution_models', 'future_climate_datasets', 'sdm_projections', 'projection_region',) } return {'env': {}, 'params': params, 'worker': workerhints}
def __generateParameters(self, params, portal_type): # This code formats the input parameters to experiments, and is a mirror "copy" of get_sdm_params, # get_project_params, get_biodiverse_params, get_traits_params, get_ensemble_params in org.bccvl.compute. inp = deepcopy(params) for key, val in inp.items(): if key in ('modelling_region', 'projection_region'): if val: val = params[key].data else: val = '{0}/API/em/v1/constraintregion?uuid={1}'.format( getSite().absolute_url(), IUUID(self.context)) if key in ('species_occurrence_dataset', 'species_absence_dataset'): if val: val = getdatasetparams(val) val['species'] = re.sub(u"[ _,\-'\"/\(\)\{\}\[\]]", u".", val.get('species', u'Unknown')) if key in ('environmental_datasets', 'future_climate_datasets'): envlist = [] for uuid, layers in val.items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer][ 'filename'] envlist.append(dsdata) val = envlist # for SDM model as input to Climate Change experiement if key == 'species_distribution_models': if val: uuid = val val = getdatasetparams(uuid) val['species'] = re.sub(u"[ _\-'\"/\(\)\{\}\[\]]", u".", val.get('species', u"Unknown")) sdmobj = uuidToObject(uuid) sdmmd = IBCCVLMetadata(sdmobj) val['layers'] = sdmmd.get('layers_used', None) # do SDM projection results sdm_projections = [] for resuuid in inp['sdm_projections']: sdm_projections.append(getdatasetparams(resuuid)) inp['sdm_projections'] = sdm_projections # for projection as input to Biodiverse experiment if key == 'projections': dslist = [] for dsparam in val: dsinfo = getdatasetparams(dsparam['dataset']) dsinfo['threshold'] = dsparam['threshold'] # Convert threshold value from Decimal to float for thkey, thvalue in dsinfo['threshold'].items(): if isinstance(thvalue, Decimal): dsinfo['threshold'][thkey] = float(thvalue) dslist.append(dsinfo) # replace projections param val = dslist # projection models as input to Ensemble experiment if key == 'datasets': dslist = [] for uuid in val: dslist.append(getdatasetparams(uuid)) # replace datasets param val = dslist # for trait dataset as input to Species Trait Modelling experiment if key == 'traits_dataset': dsinfo = getdatasetparams(val) if dsinfo['filename'].endswith('.zip'): dsinfo['zippath'] = dsinfo['layers'].values( )[0]['filename'] val = dsinfo if isinstance(val, Decimal): val = float(val) inp[key] = val if portal_type == ('org.bccvl.content.sdmexperiment', 'org.bccvl.content.msdmexperiment', 'org.bccvl.content.mmexperiment'): inp.update({ 'rescale_all_models': False, 'selected_models': 'all', 'modeling_id': 'bccvl', # generic dismo params 'tails': 'both', }) elif portal_type == 'org.bccvl.content.projectionexperiment': inp.update({ 'selected_models': 'all', 'projection_name': os.path.splitext(dsinfo['filename'])[0] }) inputParams = { # example of input/ouput directories 'env': { 'inputdir': './input', 'outputdir': './output', 'scriptdir': './script', 'workdir': './workdir' }, 'params': inp } return json.dumps(inputParams, default=str, indent=4)
def get_sdm_params(result): # make a deep copy of the params to not accedientially modify the # persisted dict params = deepcopy(result.job_params) # TODO: names to fix up # occurrence-> species_occurrence_dataset # background-> species_absence_dataset # pseudoabsence['enabled']-> species_pseudo_absence_points, # pseudoabsence['points']-> species_number_pseudo_absence_points # layers+ environment{}-> environmental_datasets TODO: turn into list of files # get all necessary metadata for files, and add worker hints to download files for paramname in ('species_occurrence_dataset', 'species_absence_dataset'): # Skip empty or non existing params if not params.get(paramname, None): continue uuid = params[paramname] dsinfo = getdatasetparams(uuid) if dsinfo['filename'].endswith('.zip'): # FIXME: too many static assumptions about how an occurrence zip file looks like # layers:key does not match anything (should it?) # assumes exactly one file here # TODO: should I remove 'layers' section here? dsinfo['zippath'] = dsinfo['layers'].values()[0]['filename'] params[paramname] = dsinfo # replace all spaces and underscores to '.' (biomod does the same) # TODO: really necessary? if params[paramname]: params[paramname]['species'] = re.sub( u"[ _\-,'\"/\(\)\{\}\[\]]", u".", params[paramname].get('species', u'Unknown')) # TODO: This assumes we only zip file based layers envlist = [] for uuid, layers in params['environmental_datasets'].items(): dsinfo = getdatasetparams(uuid) for layer in layers: dsdata = { 'uuid': dsinfo['uuid'], 'filename': dsinfo['filename'], 'downloadurl': dsinfo['downloadurl'], # TODO: should we use layer title or URI? 'layer': layer, 'type': dsinfo['layers'][layer]['datatype'] } # if this is a zip file we'll have to set zippath as well # FIXME: poor check whether this is a zip file if dsinfo['filename'].endswith('.zip'): dsdata['zippath'] = dsinfo['layers'][layer]['filename'] envlist.append(dsdata) # replace original dict params['environmental_datasets'] = envlist # TODO: quick fix Decimal json encoding through celery (where is my custom json encoder gone?) for key, item in params.items(): if isinstance(item, Decimal): params[key] = float(item) # Pass the url to get modelling region as input file if result.job_params['modelling_region']: params['modelling_region'] = { 'uuid': IUUID(result), 'filename': 'modelling_region.json', 'downloadurl': '{0}/API/em/v1/constraintregion?uuid={1}'.format( getSite().absolute_url(), IUUID(result)), } # add hints for worker to download files workerhints = { # only those parameters that are actually in params dict 'files': [ x for x in ( 'species_occurrence_dataset', 'species_absence_dataset', 'environmental_datasets', 'modelling_region', ) if x in params ] } return {'env': {}, 'params': params, 'worker': workerhints}