def test_run_experiment_twice(self, mock_run_script): # create experiment form = self.form.get_form() form.request.form.update({ 'form.buttons.save': 'Create and start', }) # update form with updated request form.update() # start experiment jt = IExperimentJobTracker(self.experiments['my-experiment']) self.assertEqual(jt.state, u'QUEUED') # error state = jt.start_job(form.request) self.assertEqual(state[0], 'error') # setup mock_run_script mock_run_script.side_effect = self.form.mock_run_script # finish current job transaction.commit() self.assertEqual(jt.state, u'COMPLETED') # TODO: after commit tasks cause site to disappear and the # following code will fail, bceause without site we can't find # a catalog without whchi we can't finde the toolkit by uuid jt.start_job(form.request) self.assertEqual(jt.state, u'PARTIAL') transaction.commit() self.assertEqual(jt.state, u'COMPLETED')
def pullOccurrenceFromALA(self, lsid, taxon, dataSrc='ala', common=None): # TODO: check permisions? # 1. create new dataset with taxon, lsid and common name set portal = getToolByName(self.context, 'portal_url').getPortalObject() if dataSrc == 'ala': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['ala'] elif dataSrc == 'gbif': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['gbif'] elif dataSrc == 'aekos': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['aekos'] else: raise BadRequest('Invalid data source {0}'.format(dataSrc)) title = [taxon] if common: title.append(u"({})".format(common)) # TODO: move content creation into IALAJobTracker? # remotedataset? swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' else: portal_type = 'org.bccvl.content.dataset' # TODO: make sure we get a better content id that dataset-x ds = createContentInContainer(dscontainer, portal_type, title=u' '.join(title)) ds.dataSource = dataSrc # Either ALA or GBIF as source # TODO: add number of occurences to description ds.description = u' '.join( title) + u' imported from ' + unicode(dataSrc.upper()) md = IBCCVLMetadata(ds) # TODO: provenance ... import url? # FIXME: verify input parameters before adding to graph md['genre'] = 'DataGenreSpeciesOccurrence' md['species'] = { 'scientificName': taxon, 'taxonID': lsid, } if common: md['species']['vernacularName'] = common IStatusMessage(self.request).add('New Dataset created', type='info') # 2. create and push alaimport job for dataset # TODO: make this named adapter jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex object to make sure everything is up to date ds.reindexObject() # Job submission state notifier IStatusMessage(self.request).add(message, type=status) return (status, message)
def __call__(self): # TODO: could also just submit current context (the experiment) # with all infos accessible from it # TODO: if submitted as admin, submit experiment as owner # also add option to restart single restart within experiment jt = IExperimentJobTracker(self.context) msgtype, msg = jt.start_job(self.request) if msgtype is not None: IStatusMessage(self.request).add(msg, type=msgtype) self.request.response.redirect(self.context.absolute_url())
def handleAdd(self, action): data, errors = self.extractData() self.validateAction(data) if errors: self.status = self.formErrorsMessage return # TODO: this is prob. a bug in base form, because createAndAdd # does not return the wrapped object. obj = self.createAndAdd(data) if obj is None: # TODO: this is probably an error here? # object creation/add failed for some reason return # get wrapped instance fo new object (see above) obj = self.context[obj.id] # mark only as finished if we get the new object self._finishedAdd = True IStatusMessage(self.request).addStatusMessage(_(u"Item created"), "info") # auto start job here jt = IExperimentJobTracker(obj) msgtype, msg = jt.start_job(self.request) if msgtype is not None: IStatusMessage(self.request).add(msg, type=msgtype)
def submitsdm(self): # TODO: catch UNAuthorized correctly and return json error if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') # make sure we have the right context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # experiments location context = self.context.restrictedTraverse( defaults.EXPERIMENTS_FOLDER_ID) else: # custom context.... let's use in context = self.context # parse request body params = self.request.form # validate input # TODO: should validate type as well..... (e.g. string has to be # string) # TODO: validate dataset and layer id's existence if possible props = {} if not params.get('title', None): self.record_error('Bad Request', 400, 'Missing parameter title', {'parameter': 'title'}) else: props['title'] = params['title'] props['description'] = params.get('description', '') if not params.get('occurrence_data', None): self.record_error('Bad Request', 400, 'Missing parameter occurrence_data', {'parameter': 'occurrence_data'}) else: # FIXME: should properly support source / id # for now only bccvl source is supported props['species_occurrence_dataset'] = params[ 'occurrence_data']['id'] # FIXME: should properly support source/id for onw only bccvl source is # supported props['species_absence_dataset'] = params.get( 'absence_data', {}).get('id', None) props['scale_down'] = params.get('scale_down', False) if not params.get('environmental_data', None): self.record_error('Bad Request', 400, 'Missing parameter environmental_data', {'parameter': 'environmental_data'}) else: props['environmental_datasets'] = params['environmental_data'] if params.get('modelling_region', ''): props['modelling_region'] = NamedBlobFile( data=json.dumps(params['modelling_region'])) else: props['modelling_region'] = None if not params.get('algorithms', None): self.record_error('Bad Request', 400, 'Missing parameter algorithms', {'parameter': 'algorithms'}) else: portal = ploneapi.portal.get() props['functions'] = {} # FIXME: make sure we get the default values from our func object for algo, algo_params in params['algorithms'].items(): if algo_params is None: algo_params = {} toolkit = portal[defaults.FUNCTIONS_FOLDER_ID][algo] toolkit_model = loadString(toolkit.schema) toolkit_schema = toolkit_model.schema func_props = {} for field_name in toolkit_schema.names(): field = toolkit_schema.get(field_name) value = algo_params.get(field_name, field.missing_value) if value == field.missing_value: func_props[field_name] = field.default else: func_props[field_name] = value props['functions'][IUUID(toolkit)] = func_props if self.errors: raise BadRequest("Validation Failed") # create experiment with data as form would do # TODO: make sure self.context is 'experiments' folder? from plone.dexterity.utils import createContent, addContentToContainer experiment = createContent("org.bccvl.content.sdmexperiment", **props) experiment = addContentToContainer(context, experiment) # TODO: check if props and algo params have been applied properly experiment.parameters = dict(props['functions']) # FIXME: need to get resolution from somewhere IBCCVLMetadata(experiment)['resolution'] = 'Resolution30m' # submit newly created experiment # TODO: handle background job submit .... at this stage we wouldn't # know the model run job ids # TODO: handle submit errors and other errors that may happen above? # generic exceptions could behandled in returnwrapper retval = { 'experiment': { 'url': experiment.absolute_url(), 'uuid': IUUID(experiment) }, 'jobs': [], } jt = IExperimentJobTracker(experiment) msgtype, msg = jt.start_job(self.request) if msgtype is not None: retval['message'] = { 'type': msgtype, 'message': msg } for result in experiment.values(): jt = IJobTracker(result) retval['jobs'].append(jt.get_job().id) return retval
def submitcc(self): # TODO: catch UNAuthorized correctly and return json error if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') # make sure we have the right context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # experiments location context = self.context.restrictedTraverse( defaults.EXPERIMENTS_FOLDER_ID) else: # custom context.... let's use in context = self.context # parse request body params = self.request.form # validate input # TODO: should validate type as well..... (e.g. string has to be # string) # TODO: validate dataset and layer id's existence if possible props = {} if not params.get('title', None): self.record_error('Bad Request', 400, 'Missing parameter title', {'parameter': 'title'}) else: props['title'] = params['title'] props['description'] = params.get('description', '') if not params.get('species_distribution_models', None): self.record_error('Bad Request', 400, 'Missing parameter species_distribution_models', {'parameter': 'species_distribution_models'}) else: props['species_distribution_models'] = params[ 'species_distribution_models'] if not params.get('future_climate_datasets', None): self.record_error('Bad Request', 400, 'Missing parameter future_climate_datasets', {'parameter': 'future_climate_datasets'}) else: props['future_climate_datasets'] = params[ 'future_climate_datasets'] if params.get('projection_region', ''): props['projection_region'] = NamedBlobFile( data=json.dumps(params['projection_region'])) else: props['projection_region'] = None if self.errors: raise BadRequest("Validation Failed") # create experiment with data as form would do # TODO: make sure self.context is 'experiments' folder? from plone.dexterity.utils import createContent, addContentToContainer experiment = createContent("org.bccvl.content.projectionexperiment", **props) experiment = addContentToContainer(context, experiment) # FIXME: need to get resolution from somewhere IBCCVLMetadata(experiment)['resolution'] = 'Resolution30m' # submit newly created experiment # TODO: handle background job submit .... at this stage we wouldn't # know the model run job ids # TODO: handle submit errors and other errors that may happen above? # generic exceptions could behandled in returnwrapper retval = { 'experiment': { 'url': experiment.absolute_url(), 'uuid': IUUID(experiment) }, 'jobs': [], } jt = IExperimentJobTracker(experiment) msgtype, msg = jt.start_job(self.request) if msgtype is not None: retval['message'] = { 'type': msgtype, 'message': msg } for result in experiment.values(): jt = IJobTracker(result) retval['jobs'].append(jt.get_job().id) return retval
def submittraits(self): # TODO: catch UNAuthorized correctly and return json error if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') # make sure we have the right context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # experiments location context = self.context.restrictedTraverse( defaults.EXPERIMENTS_FOLDER_ID) else: # custom context.... let's use in context = self.context # parse request body params = self.request.form # validate input # TODO: should validate type as well..... (e.g. string has to be # string) # TODO: validate dataset and layer id's existence if possible props = {} if params.get('species_list', None): props['species_list'] = params['species_list'] else: self.record_error('Bad Request', 400, 'Missing parameter speciesList', {'parameter': 'speciesList'}) if not params.get('title', None): self.record_error('Bad Request', 400, 'Missing parameter title', {'parameter': 'title'}) else: props['title'] = params['title'] props['description'] = params.get('description', '') if not params.get('traits_data', None): self.record_error('Bad Request', 400, 'Missing parameter traits_data', {'parameter': 'traits_data'}) else: # FIXME: should properly support source / id # for now only bccvl source is supported props['species_traits_dataset'] = params[ 'traits_data']['id'] props['species_traits_dataset_params'] = {} for col_name, col_val in params.get("columns", {}).items(): if col_val not in ('lat', 'lon', 'species', 'trait_con', 'trait_ord', 'trait_nom', 'env_var_con', 'env_var_cat', 'random_con', 'random_cat'): continue props['species_traits_dataset_params'][col_name] = col_val if not props['species_traits_dataset_params']: self.record_error('Bad Request', 400, 'Invalid values for columns', {'parameter': 'columns'}) # Check for species-level trait data i.e. species is not specified if 'species' not in props['species_traits_dataset_params'].values(): props['species_list'] = [] props['scale_down'] = params.get('scale_down', False) # env data is optional props['environmental_datasets'] = params.get('environmental_data', None) if not (props['environmental_datasets'] or 'env_var_con' not in props['species_traits_dataset_params'].values() or 'env_var_cat' not in props['species_traits_dataset_params'].values()): self.record_error('Bad Request', 400, 'No Environmental data selected', {'parameter': 'environmental_datasets'}) if params.get('modelling_region', ''): props['modelling_region'] = NamedBlobFile( data=json.dumps(params['modelling_region'])) else: props['modelling_region'] = None if not params.get('algorithms', None): self.record_error('Bad Request', 400, 'Missing parameter algorithms', {'parameter': 'algorithms'}) else: props['algorithms_species'] = {} props['algorithms_diff'] = {} funcs_env = getUtility( IVocabularyFactory, 'traits_functions_species_source')(context) funcs_species = getUtility( IVocabularyFactory, 'traits_functions_diff_source')(context) # FIXME: make sure we get the default values from our func object for algo_uuid, algo_params in params['algorithms'].items(): if algo_params is None: algo_params = {} toolkit = uuidToObject(algo_uuid) toolkit_model = loadString(toolkit.schema) toolkit_schema = toolkit_model.schema func_props = {} for field_name in toolkit_schema.names(): field = toolkit_schema.get(field_name) value = algo_params.get(field_name, field.missing_value) if value == field.missing_value: func_props[field_name] = field.default else: func_props[field_name] = value if algo_uuid in funcs_env: props['algorithms_species'][algo_uuid] = func_props elif algo_uuid in funcs_species: props['algorithms_diff'][algo_uuid] = func_props else: LOG.warn( 'Algorithm {} not in allowed list of functions'.format(toolkit.id)) if not (props['algorithms_species'] or props['algorithms_diff']): self.record_error('Bad Request', 400, 'Iinvalid algorithms selected', {'parameter': 'algorithms'}) if self.errors: raise BadRequest("Validation Failed") # create experiment with data as form would do # TODO: make sure self.context is 'experiments' folder? from plone.dexterity.utils import createContent, addContentToContainer experiment = createContent( "org.bccvl.content.speciestraitsexperiment", **props) experiment = addContentToContainer(context, experiment) experiment.parameters = dict(props['algorithms_species']) experiment.parameters.update(dict(props['algorithms_diff'])) # FIXME: need to get resolution from somewhere IBCCVLMetadata(experiment)['resolution'] = 'Resolution30m' # submit newly created experiment # TODO: handle background job submit .... at this stage we wouldn't # know the model run job ids # TODO: handle submit errors and other errors that may happen above? # generic exceptions could behandled in returnwrapper retval = { 'experiment': { 'url': experiment.absolute_url(), 'uuid': IUUID(experiment) }, 'jobs': [], } jt = IExperimentJobTracker(experiment) msgtype, msg = jt.start_job(self.request) if msgtype is not None: retval['message'] = { 'type': msgtype, 'message': msg } for result in experiment.values(): jt = IJobTracker(result) retval['jobs'].append(jt.get_job().id) return retval
def pullOccurrenceFromALA(self, lsid, taxon, dataSrc='ala', common=None): # TODO: check permisions? # 1. create new dataset with taxon, lsid and common name set portal = getToolByName(self.context, 'portal_url').getPortalObject() if dataSrc == 'ala': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['ala'] elif dataSrc == 'gbif': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['gbif'] elif dataSrc == 'aekos': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['aekos'] elif dataSrc == 'obis': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['obis'] else: raise BadRequest('Invalid data source {0}'.format(dataSrc)) title = [taxon] if common: title.append(u"({})".format(common)) # TODO: move content creation into IALAJobTracker? # remotedataset? swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' else: portal_type = 'org.bccvl.content.dataset' # TODO: make sure we get a better content id that dataset-x title = u' '.join(title) ds = createContent(portal_type, title=title) ds.dataSource = dataSrc # Either ALA or GBIF as source # TODO: add number of occurences to description ds.description = u' '.join( (title, u'imported from', unicode(dataSrc.upper()))) ds = addContentToContainer(dscontainer, ds) md = IBCCVLMetadata(ds) # TODO: provenance ... import url? # FIXME: verify input parameters before adding to graph md['genre'] = 'DataGenreSpeciesOccurrence' md['species'] = { 'scientificName': taxon, 'taxonID': lsid, } if common: md['species']['vernacularName'] = common IStatusMessage(self.request).add('New Dataset created', type='info') # 2. create and push alaimport job for dataset # TODO: make this named adapter jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex object to make sure everything is up to date ds.reindexObject() # Job submission state notifier IStatusMessage(self.request).add(message, type=status) return (status, message)
def import_ala_data(self): if self.request.get("REQUEST_METHOD", "GET").upper() != "POST": self.record_error("Request must be POST", 400) raise BadRequest("Request must be POST") context = None # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse( "/".join((defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, "ala")) ) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission("org.bccvl.AddDataset", context): raise Unauthorized("User not allowed in this context") params = self.request.form.get("data") if not params: raise BadRequest("At least on of traits or environ has to be set") if params is None: self.record_error("Bad Request", 400, "Missing parameter data", {"parameter": "data"}) if not params: self.record_error("Bad Request", 400, "Empty parameter data", {"parameter": "data"}) # TODO: should validate objects inside as well? (or use json schema # validation?) # all good so far # pull dataset from aekos # TODO: get better name here title = params[0].get("name", "ALA import") # determine dataset type # 1. test if it is a multi species import species = set() for query in params: biocache_url = "{}/occurrences/search".format(query["url"]) query = { "q": query["query"], "pageSize": 0, "limit": 2, "facets": "species_guid", "fq": "species_guid:*", # skip results without species guid } res = requests.get(biocache_url, params=query) res = res.json() # FIXME: do we need to treat sandbox downloads differently? if res["facetResults"]: # do we have some results at all? for guid in res["facetResults"][0]["fieldResult"]: species.add(guid["label"]) if len(species) > 1: portal_type = "org.bccvl.content.multispeciesdataset" else: portal_type = "org.bccvl.content.dataset" swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = "org.bccvl.content.remotedataset" # create content ds = createContentInContainer(context, portal_type, title=title) ds.dataSource = "ala" ds.description = u" ".join([title, u" imported from ALA"]) ds.import_params = params md = IBCCVLMetadata(ds) if IMultiSpeciesDataset.providedBy(ds): md["genre"] = "DataGenreSpeciesCollection" else: # species dataset md["genre"] = "DataGenreSpeciesOccurrence" md["categories"] = ["occurrence"] # TODO: populate this correctly as well md["species"] = [{"scientificName": "qid", "taxonID": "qid"}] # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add("New Dataset created", type="info") # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, "portal_url").getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader("Location", nexturl) # FIXME: should return a nice json representation of success or error return {"status": status, "message": message, "jobid": IJobTracker(ds).get_job().id}
def import_trait_data(self): source = self.request.form.get("source", None) species = self.request.form.get("species", None) traits = self.request.form.get("traits", None) environ = self.request.form.get("environ", None) context = None # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse( "/".join((defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, "aekos")) ) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { "id": member.getUserName(), "email": member.getProperty("email"), "fullname": member.getProperty("fullname"), } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission("org.bccvl.AddDataset", context): raise Unauthorized("User not allowed in this context") # check parameters if not source or source not in ("aekos"): raise BadRequest("source parameter bust be 'aekos'") if not species or not isinstance(species, (basestring, list)): raise BadRequest("Missing or invalid species parameter") elif isinstance(species, basestring): species = [species] if not traits and not environ: raise BadRequest("At least on of traits or environ has to be set") if not traits: traits = [] elif isinstance(traits, basestring): traits = [traits] if not environ: environ = [] elif isinstance(environ, basestring): environ = [environ] # all good so far # pull dataset from aekos title = " ".join(species) # determine dataset type portal_type = "org.bccvl.content.dataset" swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = "org.bccvl.content.remotedataset" # create content ds = createContentInContainer(context, portal_type, title=title) ds.dataSource = source ds.description = u" ".join( [title, ",".join(traits), ",".join(environ), u" imported from {}".format(source.upper())] ) md = IBCCVLMetadata(ds) md["genre"] = "DataGenreTraits" md["categories"] = ["traits"] md["species"] = [{"scientificName": spec, "taxonID": spec} for spec in species] md["traits"] = traits md["environ"] = environ # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add("New Dataset created", type="info") # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, "portal_url").getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader("Location", nexturl) # FIXME: should return a nice json representation of success or error return {"status": status, "message": message, "jobid": IJobTracker(ds).get_job().id}
def pullOccurrenceFromALA(self, lsid, taxon, dataSrc='ala', common=None): # TODO: check permisions? # 1. create new dataset with taxon, lsid and common name set portal = getToolByName(self.context, 'portal_url').getPortalObject() if dataSrc == 'ala': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['ala'] elif dataSrc == 'gbif': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['gbif'] elif dataSrc == 'aekos': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['aekos'] elif dataSrc == 'obis': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['obis'] else: raise BadRequest('Invalid data source {0}'.format(dataSrc)) title = [taxon] if common: title.append(u"({})".format(common)) # determine dataset type # 1. test if it is a multi species import species = set() if dataSrc == 'ala': params = [{ 'query': 'lsid:{}'.format(lsid), 'url': 'http://biocache.ala.org.au/ws' }] for query in params: biocache_url = '{}/occurrences/search'.format(query['url']) query = { 'q': query['query'], 'pageSize': 0, 'limit': 2, 'facets': 'species_guid', 'fq': 'species_guid:*' # skip results without species guid } res = requests.get(biocache_url, params=query) res = res.json() if res.get('facetResults'): # do we have some results at all? for guid in res['facetResults'][0]['fieldResult']: species.add(guid['label']) elif dataSrc == 'gbif': genusChildren_url = 'https://api.gbif.org/v1/species/{}/children?offset=0&limit=40'.format(lsid) res = requests.get(genusChildren_url) res = res.json() if res.get('results'): for sp in res.get('results'): if sp.get('speciesKey'): species.add(sp['speciesKey']) elif dataSrc == 'obis': genusChildren_url = 'https://backend.iobis.org/children/{}'.format(lsid) res = requests.get(genusChildren_url) res = res.json() for sp in res: if sp.get('rank_name', '') != 'Species': continue if sp.get('valid_id'): species.add(sp['valid_id']) if len(species) > 1: portal_type = 'org.bccvl.content.multispeciesdataset' else: swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' else: portal_type = 'org.bccvl.content.dataset' # TODO: make sure we get a better content id that dataset-x title = u' '.join(title) ds = createContent(portal_type, title=title) ds.dataSource = dataSrc # Either ALA or GBIF as source # TODO: add number of occurences to description ds.description = u' '.join( (title, u'imported from', unicode(dataSrc.upper())) ) ds = addContentToContainer(dscontainer, ds) md = IBCCVLMetadata(ds) # TODO: provenance ... import url? # FIXME: verify input parameters before adding to graph if IMultiSpeciesDataset.providedBy(ds): md['genre'] = 'DataGenreSpeciesCollection' md['categories'] = ['multispecies'] else: md['genre'] = 'DataGenreSpeciesOccurrence' md['categories'] = ['occurrence'] md['species'] = { 'scientificName': taxon, 'taxonID': lsid, } if common: md['species']['vernacularName'] = common IStatusMessage(self.request).add('New Dataset created', type='info') # 2. create and push alaimport job for dataset # TODO: make this named adapter jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex object to make sure everything is up to date ds.reindexObject() # Job submission state notifier IStatusMessage(self.request).add(message, type=status) return (status, message)
def import_ala_data(self): if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') context = None # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse( "/".join((defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, 'ala'))) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission('org.bccvl.AddDataset', context): raise Unauthorized("User not allowed in this context") params = self.request.form.get('data') if not params: raise BadRequest("At least on of traits or environ has to be set") if params is None: self.record_error('Bad Request', 400, 'Missing parameter data', {'parameter': 'data'}) if not params: self.record_error('Bad Request', 400, 'Empty parameter data', {'parameter': 'data'}) # TODO: should validate objects inside as well? (or use json schema # validation?) # all good so far # pull dataset from aekos # TODO: get better name here title = params[0].get('name', 'ALA import') # determine dataset type # 1. test if it is a multi species import species = set() for query in params: biocache_url = '{}/occurrences/search'.format(query['url']) query = { 'q': query['query'], 'pageSize': 0, 'limit': 2, 'facets': 'species_guid', 'fq': 'species_guid:*' # skip results without species guid } res = requests.get(biocache_url, params=query) res = res.json() # FIXME: do we need to treat sandbox downloads differently? if res.get('facetResults'): # do we have some results at all? for guid in res['facetResults'][0]['fieldResult']: species.add(guid['label']) # Check of it is trait-data isTrait = any([p.get('trait', 0) for p in params]) if not isTrait and len(species) > 1: portal_type = 'org.bccvl.content.multispeciesdataset' else: portal_type = 'org.bccvl.content.dataset' swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' # create content ds = createContent(portal_type, title=title) ds.dataSource = 'ala' ds.description = u' '.join([title, u' imported from ALA']) ds.import_params = params ds = addContentToContainer(context, ds) md = IBCCVLMetadata(ds) if IMultiSpeciesDataset.providedBy(ds): md['genre'] = 'DataGenreSpeciesCollection' md['categories'] = ['multispecies'] else: if isTrait: # Trait dataset md['genre'] = 'DataGenreTraits' md['categories'] = ['traits'] else: # species dataset md['genre'] = 'DataGenreSpeciesOccurrence' md['categories'] = ['occurrence'] # TODO: populate this correctly as well md['species'] = [{ 'scientificName': 'qid', 'taxonID': 'qid'}] # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add('New Dataset created', type='info') # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, 'portal_url').getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader('Location', nexturl) # FIXME: should return a nice json representation of success or error return { 'status': status, 'message': message, 'jobid': IJobTracker(ds).get_job().id }
def import_trait_data(self): if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') source = self.request.form.get('source', None) species = self.request.form.get('species', None) traits = self.request.form.get('traits', None) environ = self.request.form.get('environ', None) dataurl = self.request.form.get('url', None) context = None if not source or source not in ('aekos', 'zoatrack'): raise BadRequest("source parameter must be 'aekos' or 'zoatrack'") # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse("/".join( (defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, str(source)))) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission('org.bccvl.AddDataset', context): raise Unauthorized("User not allowed in this context") # check parameters if not species or not isinstance(species, (basestring, list)): raise BadRequest("Missing or invalid species parameter") elif isinstance(species, basestring): species = [species] # for zoatrack, url needs to be set if source == 'zoatrack' and not dataurl: raise BadRequest("url has to be set") # for aekos, at least a trait or environment variable must be specified. if source == 'aekos' and not traits and not environ: raise BadRequest( "At least a trait or environent variable has to be set") if not traits: traits = [] elif isinstance(traits, basestring): traits = [traits] if not environ: environ = [] elif isinstance(environ, basestring): environ = [environ] # all good so far # pull dataset from aekos title = ' '.join(species) # determine dataset type portal_type = 'org.bccvl.content.dataset' swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' # create content ds = createContent(portal_type, title=title) ds.dataSource = source ds.description = u' '.join([ title, ','.join(traits), ','.join(environ), u' imported from {}'.format(source.upper()) ]) ds = addContentToContainer(context, ds) md = IBCCVLMetadata(ds) md['genre'] = 'DataGenreTraits' md['categories'] = ['traits'] md['species'] = [{ 'scientificName': spec, 'taxonID': spec } for spec in species] md['traits'] = traits md['environ'] = environ md['dataurl'] = dataurl # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add('New Dataset created', type='info') # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, 'portal_url').getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader('Location', nexturl) # FIXME: should return a nice json representation of success or error return { 'status': status, 'message': message, 'jobid': IJobTracker(ds).get_job().id }
def pullOccurrenceFromALA(self, lsid, taxon, dataSrc='ala', common=None): # TODO: check permisions? # 1. create new dataset with taxon, lsid and common name set portal = getToolByName(self.context, 'portal_url').getPortalObject() if dataSrc == 'ala': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['ala'] elif dataSrc == 'gbif': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['gbif'] elif dataSrc == 'aekos': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['aekos'] elif dataSrc == 'obis': dscontainer = portal[defaults.DATASETS_FOLDER_ID][ defaults.DATASETS_SPECIES_FOLDER_ID]['obis'] else: raise BadRequest('Invalid data source {0}'.format(dataSrc)) title = [taxon] if common: title.append(u"({})".format(common)) # determine dataset type # 1. test if it is a multi species import species = set() if dataSrc == 'ala': params = [{ 'query': 'lsid:{}'.format(lsid), 'url': 'https://biocache-ws.ala.org.au/ws' }] for query in params: biocache_url = '{}/occurrences/search'.format(query['url']) query = { 'q': query['query'], 'pageSize': 0, 'limit': 2, 'facets': 'species_guid', 'fq': 'species_guid:*' # skip results without species guid } res = requests.get(biocache_url, params=query) res = res.json() if res.get('facetResults'): # do we have some results at all? for guid in res['facetResults'][0]['fieldResult']: species.add(guid['label']) elif dataSrc == 'gbif': genusChildren_url = 'https://api.gbif.org/v1/species/{}/children?offset=0&limit=40'.format( lsid) res = requests.get(genusChildren_url) res = res.json() if res.get('results'): for sp in res.get('results'): if sp.get('speciesKey'): species.add(sp['speciesKey']) elif dataSrc == 'obis': genusChildren_url = 'https://backend.iobis.org/children/{}'.format( lsid) res = requests.get(genusChildren_url) res = res.json() for sp in res: if sp.get('rank_name', '') != 'Species': continue if sp.get('valid_id'): species.add(sp['valid_id']) if len(species) > 1: portal_type = 'org.bccvl.content.multispeciesdataset' else: swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' else: portal_type = 'org.bccvl.content.dataset' # TODO: make sure we get a better content id that dataset-x title = u' '.join(title) ds = createContent(portal_type, title=title) ds.dataSource = dataSrc # Either ALA or GBIF as source # TODO: add number of occurences to description ds.description = u' '.join( (title, u'imported from', unicode(dataSrc.upper()))) ds = addContentToContainer(dscontainer, ds) md = IBCCVLMetadata(ds) # TODO: provenance ... import url? # FIXME: verify input parameters before adding to graph if IMultiSpeciesDataset.providedBy(ds): md['genre'] = 'DataGenreSpeciesCollection' md['categories'] = ['multispecies'] else: md['genre'] = 'DataGenreSpeciesOccurrence' md['categories'] = ['occurrence'] md['species'] = { 'scientificName': taxon, 'taxonID': lsid, } if common: md['species']['vernacularName'] = common IStatusMessage(self.request).add('New Dataset created', type='info') # 2. create and push alaimport job for dataset # TODO: make this named adapter jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex object to make sure everything is up to date ds.reindexObject() # Job submission state notifier IStatusMessage(self.request).add(message, type=status) return (status, message)
def import_ala_data(self): if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') context = None # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse("/".join( (defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, 'ala'))) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission('org.bccvl.AddDataset', context): raise Unauthorized("User not allowed in this context") params = self.request.form.get('data') if not params: raise BadRequest("At least on of traits or environ has to be set") if params is None: self.record_error('Bad Request', 400, 'Missing parameter data', {'parameter': 'data'}) if not params: self.record_error('Bad Request', 400, 'Empty parameter data', {'parameter': 'data'}) # TODO: should validate objects inside as well? (or use json schema # validation?) # all good so far # pull dataset from aekos # TODO: get better name here title = params[0].get('name', 'ALA import') # determine dataset type # 1. test if it is a multi species import species = set() for query in params: biocache_url = '{}/occurrences/search'.format(query['url']) query = { 'q': query['query'], 'pageSize': 0, 'limit': 2, 'facets': 'species_guid', 'fq': 'species_guid:*' # skip results without species guid } res = requests.get(biocache_url, params=query) res = res.json() # FIXME: do we need to treat sandbox downloads differently? if res.get('facetResults'): # do we have some results at all? for guid in res['facetResults'][0]['fieldResult']: species.add(guid['label']) if len(species) > 1: portal_type = 'org.bccvl.content.multispeciesdataset' else: portal_type = 'org.bccvl.content.dataset' swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' # create content ds = createContent(portal_type, title=title) ds.dataSource = 'ala' ds.description = u' '.join([title, u' imported from ALA']) ds.import_params = params ds = addContentToContainer(context, ds) md = IBCCVLMetadata(ds) if IMultiSpeciesDataset.providedBy(ds): md['genre'] = 'DataGenreSpeciesCollection' md['categories'] = ['multispecies'] else: # species dataset md['genre'] = 'DataGenreSpeciesOccurrence' md['categories'] = ['occurrence'] # TODO: populate this correctly as well md['species'] = [{'scientificName': 'qid', 'taxonID': 'qid'}] # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add('New Dataset created', type='info') # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, 'portal_url').getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader('Location', nexturl) # FIXME: should return a nice json representation of success or error return { 'status': status, 'message': message, 'jobid': IJobTracker(ds).get_job().id }
def import_trait_data(self): if self.request.get('REQUEST_METHOD', 'GET').upper() != 'POST': self.record_error('Request must be POST', 400) raise BadRequest('Request must be POST') source = self.request.form.get('source', None) species = self.request.form.get('species', None) traits = self.request.form.get('traits', None) environ = self.request.form.get('environ', None) dataurl = self.request.form.get('url', None) context = None if not source or source not in ('aekos', 'zoatrack'): raise BadRequest("source parameter must be 'aekos' or 'zoatrack'") # get import context if ISiteRoot.providedBy(self.context): # we have been called at site root... let's traverse to default # import location context = self.context.restrictedTraverse( "/".join((defaults.DATASETS_FOLDER_ID, defaults.DATASETS_SPECIES_FOLDER_ID, str(source)))) else: # custom context.... let's use in context = self.context # do user check first member = ploneapi.user.get_current() if member.getId(): user = { 'id': member.getUserName(), 'email': member.getProperty('email'), 'fullname': member.getProperty('fullname') } else: # We need at least a valid user raise Unauthorized("Invalid user") # check permission if not checkPermission('org.bccvl.AddDataset', context): raise Unauthorized("User not allowed in this context") # check parameters if not species or not isinstance(species, (basestring, list)): raise BadRequest("Missing or invalid species parameter") elif isinstance(species, basestring): species = [species] # for zoatrack, url needs to be set if source == 'zoatrack' and not dataurl: raise BadRequest("url has to be set") # for aekos, at least a trait or environment variable must be specified. if source == 'aekos' and not traits and not environ: raise BadRequest("At least a trait or environent variable has to be set") if not traits: traits = [] elif isinstance(traits, basestring): traits = [traits] if not environ: environ = [] elif isinstance(environ, basestring): environ = [environ] # all good so far # pull dataset from aekos title = ' '.join(species) # determine dataset type portal_type = 'org.bccvl.content.dataset' swiftsettings = getUtility(IRegistry).forInterface(ISwiftSettings) if swiftsettings.storage_url: portal_type = 'org.bccvl.content.remotedataset' # create content ds = createContent(portal_type, title=title) ds.dataSource = source ds.description = u' '.join([ title, ','.join(traits), ','.join(environ), u' imported from {}'.format(source.upper())]) ds = addContentToContainer(context, ds) md = IBCCVLMetadata(ds) md['genre'] = 'DataGenreTraits' md['categories'] = ['traits'] md['species'] = [{ 'scientificName': spec, 'taxonID': spec} for spec in species] md['traits'] = traits md['environ'] = environ md['dataurl'] = dataurl # FIXME: IStatusMessage should not be in API call from Products.statusmessages.interfaces import IStatusMessage IStatusMessage(self.request).add('New Dataset created', type='info') # start import job jt = IExperimentJobTracker(ds) status, message = jt.start_job() # reindex ojebct to make sure everything is up to date ds.reindexObject() # FIXME: IStatutsMessage should not be in API call IStatusMessage(self.request).add(message, type=status) # FIXME: API should not return a redirect # 201: new resource created ... location may point to resource from Products.CMFCore.utils import getToolByName portal = getToolByName(self.context, 'portal_url').getPortalObject() nexturl = portal[defaults.DATASETS_FOLDER_ID].absolute_url() self.request.response.setStatus(201) self.request.response.setHeader('Location', nexturl) # FIXME: should return a nice json representation of success or error return { 'status': status, 'message': message, 'jobid': IJobTracker(ds).get_job().id }