def setCitation(pkg): citation = [] title = pkg.get('title') authors = pkg.get('author') year = getPackageExtra('Year', pkg) doi = getPackageExtra('EcoSIS DOI', pkg) if doi is None or doi == '': doi = getPackageExtra('Citation DOI', pkg) if authors is not None: authors = authors.split(',') authors = map(unicode.strip, authors) if len(authors) == 1: citation.append(authors[0]) elif len(authors) == 2: citation.append(' and '.join(authors)) elif len(authors) > 2: last = authors.pop() citation.append('%s and %s' % (', '.join(authors), last)) if year is not None: citation.append(year) if title is not None: citation.append(title) citation.append('Data set. Available on-line [http://ecosis.org] from the Ecosystem Spectral Information System (EcoSIS)') if doi is not None: citation.append(doi) citation = '. '.join(citation) setPackageExtra('Citation', citation, pkg)
def getDoiStatus(pkg): doi = { 'status' : getPackageExtra('EcoSIS DOI Status', pkg), 'value' : getPackageExtra('EcoSIS DOI', pkg) } if doi['status'] is None or doi['status'] == "": doi['status'] = {} else: doi['status'] = json.loads(doi['status']) return doi
def process(): response.headers["Content-Type"] = "application/json" package_id = request.params.get('package_id') hasAccess(package_id) if hasAppliedDoi(package_id): return {'error':True, 'message':'Cannot edit resource of package with applied DOI'} sheet_id = request.params.get('sheet_id') resource_id = request.params.get('resource_id') ids = request.params.get('resource_ids') if sheet_id == "": sheet_id = None try: options = json.loads(request.params.get('options')) except: options = {} # option, if a resource id and a datasheet id are passed, then the full 'merged' view will be return # only allow specified options safeOptions = {} for option in parseOptions: if option in options: safeOptions[option] = options[option] # see if we are editing multiple files or just one result = [] if ids is not None: ids = json.loads(ids) for resource_id in ids: workspace.prepareFile(package_id, resource_id, sheet_id, safeOptions) result.append(query.getResource(resource_id)) else: workspace.prepareFile(package_id, resource_id, sheet_id, safeOptions) result = query.getResource(resource_id, sheet_id) # update the dataset, so the metadata timestamp changes context = {'model': model, 'user': c.user} pkg = logic.get_action('package_show')(context, {'id': package_id}) # use this counter to poke the dataset. This will update the last modified timestamps # required for 'updated since last pushed UI' resourceUpdateCount = utils.getPackageExtra('resourceUpdateCount', pkg) if resourceUpdateCount is None: resourceUpdateCount = 1 else: resourceUpdateCount = int(resourceUpdateCount) + 1 utils.setPackageExtra('resourceUpdateCount', resourceUpdateCount, pkg) pkg = logic.get_action('package_update')(context, pkg) result = { 'metadata_modified' : pkg.get('metadata_modified'), 'result' : result } return jsonStringify(result)
def processGeoJson(bboxInfo, pkg): result = { "type": "GeometryCollection", "geometries": [] } # if we found bbox info in the spectra, add it if bboxInfo['use']: result['geometries'].append({ "type": "Polygon", "coordinates" : [[ [bboxInfo["maxlng"], bboxInfo["maxlat"]], [bboxInfo["minlng"], bboxInfo["maxlat"]], [bboxInfo["minlng"], bboxInfo["minlat"]], [bboxInfo["maxlng"], bboxInfo["minlat"]], [bboxInfo["maxlng"], bboxInfo["maxlat"]] ]] }) geojson = getPackageExtra("geojson", pkg) if geojson != None: try: # TODO: add checks for valid geojson result['geometries'].append(json.loads(geojson)) except Exception: pass return result
def processAttribute(name, input, pkg, mrValue, setValues, keywords): val = None # first we need to get the values we are working with if name == 'Keywords': # this is the keywords attribute, special case val = keywords elif schemaMap.get(name) != None: # if the schemaMap has alias set, lookup value based on alias name val = pkg.get(schemaMap.get(name)) else: # otherwise just use the provided attribute name val = getPackageExtra(name, pkg) # if we don't have values to process, do nothing if val == None or val == '': return # if attribute schema type is 'controlled', split to multiple values if name == 'Keywords': pass elif input == "controlled" or input == "split-text" or name == 'Author': val = val.split(",") else: # we store everything as an array, easier to handle on other end val = [val] # now we have an dataset value, see if we have spectra value and join if we do # what does this mean? So spectra resource attributes were mapreduced into # this single 'mrValue' dict. If the attribute name is found as a first class # citizen, then it was provided by the spectra and we need to include it if mrValue.get(name) != None: spValues = mrValue.get(name) # merge and above values with new values for v in val: if not v in spValues: spValues.append(v) val = spValues # finally, clean all values (strip and set to lower case) if name != 'geojson' and name != 'Citation': val = map(lambda it: cleanValue(it), val) setValues['$set']['value.'+name] = val
def updateEcosisNs(pkg, spectra_count, bboxInfo): # get the package workspace object, contains config config = collections.get("package").find_one({"packageId": pkg.get("id")}) if config is None: config = {} collection = collections.get('search_package') sort = getPackageExtra("sort", pkg) if sort is None: sort = {} else: sort = json.loads(sort) # store these as dates created = None modified = None try: created = dateparser.parse(pkg.get("metadata_created")) except Exception as e: pass try: modified = dateparser.parse(pkg.get("metadata_modified")) except Exception as e: pass # default ecosis information we are going to add to the package ecosis = { # TODO: change to ISO string, check this doesn't break 'updated since last push check' "pushed" : datetime.datetime.utcnow(), "organization" : "", "organization_id" : "", "description" : pkg.get('notes'), "groups" : [], "package_id" : pkg.get("id"), "package_name" : pkg.get("name"), "package_title" : pkg.get("title"), "created" : created, "modified" : modified, "version" : pkg.get("version"), "license" : pkg.get("license_title"), "spectra_count" : spectra_count, "spectra_metadata_schema" : { "wavelengths" : [], "metadata" : [], "units" : {}, "aliases" : None, }, "resources" : [], "linked_data" : [], "geojson" : None, "sort_on" : sort.get("on"), "sort_type": sort.get("type"), "sort_description" : sort.get("description") } # append the units units = query.allUnits(pkg.get("id")) if units != None: ecosis["spectra_metadata_schema"]["units"] = units # append the linked data linkeddata = getPackageExtra('LinkedData', pkg) if linkeddata != None: ecosis["linked_data"] = json.loads(linkeddata) # append the EcoSIS DOI doi = getPackageExtra('EcoSIS DOI', pkg) if doi != None: ecosis["doi"] = doi # append the list of resources for item in pkg['resources']: if item.get("state") != "active": continue ecosis["resources"].append({ "type" : item.get('url_type'), "mimetype" : item.get("mimetype"), "name" : item.get("name"), "url" : item.get("url") }) # append the list of keywords keywords = [] for item in pkg['tags']: keywords.append(item['display_name']) # add metadata aliases aliases = getPackageExtra('aliases', pkg) if aliases is not None: try: ecosis["spectra_metadata_schema"]["aliases"] = json.loads(aliases) # map units for aliases for key, value in ecosis["spectra_metadata_schema"]["aliases"].iteritems(): unit = ecosis["spectra_metadata_schema"]["units"].get(value) if unit is not None: ecosis["spectra_metadata_schema"]["units"][key] = unit except Exception: pass # append the data groups for item in pkg['groups']: ecosis["groups"].append(item['display_name']) # append the organizations if 'organization' in pkg: if pkg['organization'] != None: ecosis["organization"] = pkg['organization']['title'] ecosis["organization_id"] = pkg['organization']['id'] else: ecosis['organization'] = 'None' else: ecosis['organization'] = 'None' # make sure the map reduce did not create a null collection, if so, remove # This means there is no spectra item = collection.find_one({'_id': pkg['id']}) # now see if we have a group by attribute... if item is None: pass elif item.get('value') is None: collection.remove({'_id': pkg['id']}) else: item = collection.find_one({'_id': pkg['id']}) setValues = {'$set' : { 'value.ecosis': ecosis }, '$unset' : {}} # grab the mapreduce value mrValue = item.get('value') # process ecosis schema # bubble attributes from mapreduce names = [] for category, items in schema.iteritems(): for item in items: name = item.get('name') input = item.get('input') # ignore these attributes, they should not be processed. # TODO: make this a global list if name == 'Latitude' or name == 'Longitude' or name == 'geojson' or name == 'NASA GCMD Keywords': continue # processAttribute does all sorts-o-stuff, see function definition below processAttribute(name, input, pkg, mrValue, setValues, keywords) names.append(name) if item.get('allowOther') == True: processAttribute(name+" Other", "split-text", pkg, mrValue, setValues, keywords) names.append(name+" Other") # set the known data attributes # the mapreduce function created these objects, storing all know wavelengths and metadata attributes # for the function. Here we transform those objects (dicts) into arrays, we only care about the keys # Finally, MongoDB does not allow '.' in attribute name, so names were stored with commas instead, # transpose the ',' back to '.' for key in mrValue['tmp__schema__']['wavelengths']: ecosis['spectra_metadata_schema']['wavelengths'].append(re.sub(r',', '.', key)) for key in mrValue['tmp__schema__']['metadata']: ecosis['spectra_metadata_schema']['metadata'].append(re.sub(r',', '.', key)) # tell MongoDB to remove the object storing our schema information processed above setValues['$unset']['value.tmp__schema__'] = '' # append the gcmd keywords gcmd = getPackageExtra('NASA GCMD Keywords', pkg) if gcmd is not None and gcmd != '': arr = json.loads(gcmd) setValues['$set']['value.ecosis']['nasa_gcmd_keywords'] = arr keywords = [] # create unique array of all gcmd keywords to be searched on for item in arr: parts = item.get('label').split('>') parts = map(unicode.strip, parts) for key in parts: if key not in keywords: keywords.append(key) setValues['$set']['value.NASA GCMD Keywords'] = keywords # finally, let's handle geojson geojson = processGeoJson(bboxInfo, pkg); if len(geojson.get('geometries')) == 0: setValues['$set']['value.ecosis']['geojson'] = None else: setValues['$set']['value.ecosis']['geojson'] = geojson # really, finally, update the collection with the 'setValues' dict we have been creating collection.update( {'_id': pkg['id']}, setValues )