Beispiel #1
0
def update_user_tags(user):
    '''Function to update the user tags from their home node.'''

    if user.profile.openid() is not None:

        openid = user.profile.openid()
        url = "http://%s/share/user/?openid=%s" % (user.profile.site.domain,
                                                   user.profile.openid())
        print 'Updating user tags: querying URL=%s' % url
        jobj = getJson(url)

        if jobj is not None and openid in jobj[
                'users'] and 'project_tags' in jobj['users'][openid]:

            # loop over tags found on user home node
            tags = []
            for tagName in jobj['users'][openid]['project_tags']:
                try:
                    tags.append(ProjectTag.objects.get(name__iexact=tagName))
                except ObjectDoesNotExist:
                    pass  # tag not found in local database

            # store tags in local user profile
            userProfile = UserProfile.objects.get(id=user.profile.id)
            userProfile.tags.clear()
            userProfile.tags = tags
            userProfile.save()
            transaction.commit()
            print 'User: %s updated for tags: %s' % (user, tags)


# NOTE: connecting the login signal is not needed because every time the user logs in,
# the session is refreshed and updating of projects is triggered already by the CoG session middleware
#user_logged_in.connect(update_user_projects_at_login)
Beispiel #2
0
def update_user_tags(user):
    '''Function to update the user tags from their home node.'''
    
    if user.profile.openid() is not None:
        
        openid = user.profile.openid()
        url = "http://%s/share/user/?openid=%s" % (user.profile.site.domain, user.profile.openid())
        print 'Updating user tags: querying URL=%s' % url
        jobj = getJson(url)
        
        if jobj is not None and openid in jobj['users'] and 'project_tags' in jobj['users'][openid]:
            
            # loop over tags found on user home node
            tags = []
            for tagName in jobj['users'][openid]['project_tags']:
                try:
                    tags.append( ProjectTag.objects.get(name__iexact=tagName) )
                except ObjectDoesNotExist:
                    pass # tag not found in local database
            
            # store tags in local user profile
            userProfile = UserProfile.objects.get(id=user.profile.id)
            userProfile.tags.clear()
            userProfile.tags = tags
            userProfile.save()
            transaction.commit()
            print 'User: %s updated for tags: %s' % (user, tags)
    
# NOTE: connecting the login signal is not needed because every time the user logs in,
# the session is refreshed and updating of projects is triggered already by the CoG session middleware
#user_logged_in.connect(update_user_projects_at_login)
Beispiel #3
0
 def sync(self):
     '''Updates the list of remote projects from all peer sites.'''
     
     # loop over peer sites  
     sites = {}  
     totalNumberOfProjects = 0
     totalNumberOfUsers = 0   
     
     # loop over federated peer site + local site
     allSites = getPeerSites()
     local_site = Site.objects.get_current()
     allSites.append( local_site )
     for site in allSites:
         url = "http://%s/share/projects/" % site.domain
         numberOfUsers = 0
         numberOfProjects = 0
         jobj = getJson(url)
         if jobj is None:
             status = 'ERROR'
         else:
             status = 'OK'
             numberOfProjects = len( jobj["projects"])
             numberOfUsers = int( jobj.get("users",0) )
             if site != local_site:
                 # harvest projects, tags from remote site
                 self._harvest(jobj)
                 
         sites[site.id] = { 'name': site.name, 'domain':site.domain, 'url': url, 'status':status,
                            'numberOfProjects': numberOfProjects, 'numberOfUsers': numberOfUsers  }
         
         totalNumberOfProjects += numberOfProjects
         totalNumberOfUsers += numberOfUsers
         
     return sites, totalNumberOfProjects, totalNumberOfUsers
Beispiel #4
0
def ac_list(request):
    """
    View to display all access control groups that may be used to restrict data access.
    This view is intentionally open to the public (for now).
    """

    # loop over local node + peer nodes
    groups = {}
    sites = [Site.objects.get_current()] + getPeerSites()

    for site in sites:
        url = "http://%s/share/groups/" % site.domain
        jobj = getJson(url)
        if jobj is not None:  # no error in fetching URL
            site_name = jobj["site"]["name"]
            site_domain = jobj["site"]["domain"]

            # loop over groups for this node
            for group_name, group_dict in jobj["groups"].items():
                # augment group dictionary
                group_dict["site_name"] = site_name
                group_dict["site_domain"] = site_domain
                groups[group_name] = group_dict

    # order groups by name
    _groups = OrderedDict(sorted(groups.items()))

    # remove ESGF root group
    if "wheel" in _groups:
        del _groups["wheel"]

    return render(
        request, "cog/access_control/list.html", {"groups": _groups, "title": "ESGF Data Access Control Groups"}
    )
def ac_list(request):
    """
    View to display all access control groups that may be used to restrict data access.
    This view is intentionally open to the public (for now).
    """

    # loop over local node + peer nodes
    groups = {}
    sites = [Site.objects.get_current()] + getPeerSites()

    for site in sites:
        url = "http://%s/share/groups/" % site.domain
        jobj = getJson(url)
        if jobj is not None:  # no error in fetching URL
            site_name = jobj['site']['name']
            site_domain = jobj['site']['domain']

            # loop over groups for this node
            for group_name, group_dict in jobj['groups'].items():
                # augment group dictionary
                group_dict['site_name'] = site_name
                group_dict['site_domain'] = site_domain
                groups[group_name] = group_dict

    # order groups by name
    _groups = OrderedDict(sorted(groups.items()))

    # remove ESGF root group
    if 'wheel' in _groups:
        del _groups['wheel']

    return render(request, 'cog/access_control/list.html', {
        'groups': _groups,
        'title': 'ESGF Data Access Control Groups'
    })
Beispiel #6
0
def discoverSiteForUser(openid):
    '''IMPORTANT: call this function ONLY at account creation as it makes requests to all peer nodes.'''

    for site in getPeerSites():  # loop over enabled peer nodes
        url = "http://%s/share/user/?openid=%s" % (site.domain, openid)
        jobj = getJson(url)
        if jobj is not None:
            for key, value in jobj['users'].items():
                if str(value['home_site_domain']) == site.domain:
                    return site  # node found

    # node not found
    return None
Beispiel #7
0
def discoverSiteForUser(openid):
    '''IMPORTANT: call this function ONLY at account creation as it makes requests to all peer nodes.'''
        
    for site in getPeerSites():  # loop over enabled peer nodes
        url = "http://%s/share/user/?openid=%s" % (site.domain, openid)
        jobj = getJson(url)
        if jobj is not None:
            for key, value in jobj['users'].items():
                if str(value['home_site_domain']) == site.domain:
                    return site  # node found
            
    # node not found
    return None
Beispiel #8
0
def getDataCartsForUser(openid):

    dcs = {}  # dictionary of (site_name, datacart_size) items

    #for site in Site.objects.all():  # loop over all sites (e.g. nodes) in database. Note: includes current node
    for site in getPeerSites():  # loop over nodes that are federated
        url = "http://%s/share/user/?openid=%s" % (site.domain, openid)
        print 'Querying for datacart: url=%s' % url
        jobj = getJson(url)
        if jobj is not None:
            for key, value in jobj['users'].items():
                dcs[site] = int(value['datacart']['size'])

    return dcs
Beispiel #9
0
def getDataCartsForUser(openid):
        
    dcs = {}  # dictionary of (site_name, datacart_size) items
    
    #for site in Site.objects.all():  # loop over all sites (e.g. nodes) in database. Note: includes current node
    for site in getPeerSites():  # loop over nodes that are federated
        url = "http://%s/share/user/?openid=%s" % (site.domain, openid)
        print 'Querying for datacart: url=%s' % url
        jobj = getJson(url)
        if jobj is not None:
            for key, value in jobj['users'].items():
                dcs[ site ] = int( value['datacart']['size'] )
            
    return dcs
Beispiel #10
0
    def sync(self):
        '''Updates the list of remote projects from all peer sites.'''

        # loop over peer sites
        sites = {}
        totalNumberOfProjects = 0
        totalNumberOfUsers = 0

        # loop over federated peer site + local site
        allSites = getPeerSites()
        local_site = Site.objects.get_current()
        allSites.append(local_site)
        for site in allSites:
            url = "http://%s/share/projects/" % site.domain
            numberOfUsers = 0
            numberOfProjects = 0
            jobj = getJson(url)
            if jobj is None:
                status = 'ERROR'
            else:
                status = 'OK'
                numberOfProjects = len(jobj["projects"])
                numberOfUsers = int(jobj.get("users", 0))
                if site != local_site:
                    # harvest projects, tags from remote site
                    self._harvest(jobj)

            sites[site.id] = {
                'name': site.name,
                'domain': site.domain,
                'url': url,
                'status': status,
                'numberOfProjects': numberOfProjects,
                'numberOfUsers': numberOfUsers
            }

            totalNumberOfProjects += numberOfProjects
            totalNumberOfUsers += numberOfUsers

        return sites, totalNumberOfProjects, totalNumberOfUsers
Beispiel #11
0
def download(request):
	'''
	View that initiates the Globus download workflow by collecting and optionally sub-selecting the GridFTP URLs to be downloaded.
	This view can be invoked via GET (link from search page, one dataset only) or POST (link from data cart page, multiple datasets at once).
	Example URL: http://localhost:8000/globus/download/
	             ?dataset=obs4MIPs.NASA-JPL.AIRS.mon.v1%[email protected],obs4MIPs.NASA-JPL.MLS.mon.v1%[email protected]
	             &method=web
	'''
		
	# retrieve request parameters
	datasets = getQueryDict(request).get('dataset','').split(",")
	# optional query filter
	query = getQueryDict(request).get('query',None)
	# maximum number of files to query for, if specified
	limit = request.GET.get('limit', DOWNLOAD_LIMIT)
	
	# map of (data_node, list of GridFTP URLs to download)
	download_map = {}
	
	# loop over requested datasets
	for dataset in datasets:
		
		# query each index_node for all files belonging to that dataset
		(dataset_id, index_node) = str(dataset).split('@')
		
		params = [ ('type',"File"), ('dataset_id',dataset_id),
				   ('offset','0'), ('limit',limit), ('fields','url'), ("format", "application/solr+json") ]
		
		if query is not None and len(query.strip())>0:
			params.append( ('query', query) )
			
		# optional shard
		shard = request.GET.get('shard', '')
		if shard is not None and len(shard.strip()) > 0:
			params.append(('shards', shard+"/solr"))  # '&shards=localhost:8982/solr'
		else:
			params.append(("distrib", "false"))

			
		url = "http://"+index_node+"/esg-search/search?"+urllib.urlencode(params)
		print 'Searching for files at URL: %s' % url
		jobj = getJson(url)
		
		# parse response for GridFTP URls
		if jobj is not None:
			for doc in jobj['response']['docs']:
				access = {}
				for url in doc['url']:
					# example URLs:
					# 'http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/netcdf|HTTPServer'
					# 'http://esg-datanode.jpl.nasa.gov/thredds/dodsC/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc.html|application/opendap-html|OPENDAP'
					# 'globus:8a3f3166-e9dc-11e5-97d6-22000b9da45e//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|Globus|Globus'
					# 'gsiftp://esg-datanode.jpl.nasa.gov:2811//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/gridftp|GridFTP'
					parts = url.split('|')
					access[parts[2].lower()] = parts[0]
				if 'globus' in access:
					m = re.match('globus:([^/]*)(.*)', access['globus'])
					if m:
						gendpoint_name = m.group(1)
						path = m.group(2)
						if not gendpoint_name in download_map:
							download_map[gendpoint_name] = [] # insert empty list of paths
						download_map[gendpoint_name].append(path)
				else:
					print 'The file is not accessible through Globus'
		else:
			return HttpResponseServerError("Error querying for files URL")
						
	# store map in session
	request.session[GLOBUS_DOWNLOAD_MAP] = download_map
	print 'Stored Globus Download Map=%s at session scope' % download_map
	
	# redirect after post (to display page)
	return HttpResponseRedirect( reverse('globus_transfer') )
Beispiel #12
0
def download(request):
    '''
	View that initiates the Globus download workflow by collecting and optionally sub-selecting the GridFTP URLs to be downloaded.
	This view can be invoked via GET (link from search page, one dataset only) or POST (link from data cart page, multiple datasets at once).
	Example URL: http://localhost:8000/globus/download/
	             ?dataset=obs4MIPs.NASA-JPL.AIRS.mon.v1%[email protected],obs4MIPs.NASA-JPL.MLS.mon.v1%[email protected]
	             &method=web
	'''

    # retrieve request parameters
    datasets = getQueryDict(request).get('dataset', '').split(",")
    # optional query filter
    query = getQueryDict(request).get('query', None)
    # maximum number of files to query for, if specified
    limit = request.GET.get('limit', DOWNLOAD_LIMIT)

    # map of (data_node, list of GridFTP URLs to download)
    download_map = {}

    # loop over requested datasets
    for dataset in datasets:

        # query each index_node for all files belonging to that dataset
        (dataset_id, index_node) = str(dataset).split('@')

        params = [('type', "File"), ('dataset_id', dataset_id),
                  ('offset', '0'), ('limit', limit), ('fields', 'url'),
                  ("format", "application/solr+json")]

        if query is not None and len(query.strip()) > 0:
            params.append(('query', query))

        # optional shard
        shard = request.GET.get('shard', '')
        if shard is not None and len(shard.strip()) > 0:
            params.append(
                ('shards', shard + "/solr"))  # '&shards=localhost:8982/solr'
        else:
            params.append(("distrib", "false"))

        url = "http://" + index_node + "/esg-search/search?" + urllib.urlencode(
            params)
        print 'Searching for files at URL: %s' % url
        jobj = getJson(url)

        # parse response for GridFTP URls
        if jobj is not None:
            for doc in jobj['response']['docs']:
                access = {}
                for url in doc['url']:
                    # example URLs:
                    # 'http://esg-datanode.jpl.nasa.gov/thredds/fileServer/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/netcdf|HTTPServer'
                    # 'http://esg-datanode.jpl.nasa.gov/thredds/dodsC/esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc.html|application/opendap-html|OPENDAP'
                    # 'globus:8a3f3166-e9dc-11e5-97d6-22000b9da45e//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|Globus|Globus'
                    # 'gsiftp://esg-datanode.jpl.nasa.gov:2811//esg_dataroot/obs4MIPs/observations/atmos/husNobs/mon/grid/NASA-JPL/AIRS/v20110608/husNobs_AIRS_L3_RetStd-v5_200209-201105.nc|application/gridftp|GridFTP'
                    parts = url.split('|')
                    access[parts[2].lower()] = parts[0]
                if 'globus' in access:
                    m = re.match('globus:([^/]*)(.*)', access['globus'])
                    if m:
                        gendpoint_name = m.group(1)
                        path = m.group(2)
                        if not gendpoint_name in download_map:
                            download_map[gendpoint_name] = [
                            ]  # insert empty list of paths
                        download_map[gendpoint_name].append(path)
                else:
                    print 'The file is not accessible through Globus'
        else:
            return HttpResponseServerError("Error querying for files URL")

    # store map in session
    request.session[GLOBUS_DOWNLOAD_MAP] = download_map
    print 'Stored Globus Download Map=%s at session scope' % download_map

    # redirect after post (to display page)
    return HttpResponseRedirect(reverse('globus_transfer'))
Beispiel #13
0
def get_all_shared_user_info(user, includeCurrentSite=True):
    """Queries all nodes (including local node) for projects and groups the user belongs to.
       Returns two lists of dictionaries but does NOT update the local database.
       Example of JSON data retrieved from each node:
      {
        "users": {
            "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin": {
                "home_site_domain": "cog-esgf.esrl.noaa.gov", 
                "openid": "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin", 
                "datacart": {
                    "size": 0
                }, 
                "home_site_name": "NOAA ESRL ESGF-CoG", 
                "groups": {
                    "HIWPP": {}, 
                    "NCPP DIP": {
                        "admin": true, 
                        "publisher": true, 
                        "super": true, 
                        "user": true
                    }, 
                    "NOAA ESRL": {
                        "super": true
                    }
                }, 
                "projects": {
                    "AlaskaSummerSchool": [
                        "admin", 
                        "user"
                    ], 
                    "CF-Grids": [
                        "admin"
                    ], 
                    "CFSS": [
                        "admin", 
                        "user"
                    ], 
                .....
    """

    # dictionary of information retrieved from each node, including current node
    userDict = {}  # node --> dictionary of user information

    try:
        if user.profile.openid() is not None:

            openid = user.profile.openid()
            print 'Retrieving projects, groups for user with openid=%s' % openid

            # loop over remote (enabled) nodes, possibly add current node
            sites = list(getPeerSites())
            if includeCurrentSite:
                sites = sites + [Site.objects.get_current()]

            for site in sites:

                url = "http://%s/share/user/?openid=%s" % (site.domain, openid)
                print 'Retrieving user projects and groups from URL=%s' % url
                jobj = getJson(url)
                if jobj is not None and openid in jobj['users']:
                    userDict[site] = jobj['users'][openid]
                else:
                    print 'Openid=%s not found at site=%s' % (openid, site)

    except UserProfile.DoesNotExist:
        pass  # user profile not yet created

    # restructure information as list of (project object, user roles) and (group name, group roles) tuples
    projects = []
    groups = []
    for usite, udict in userDict.items():
        if udict.get('projects', None):
            for pname, proles in udict["projects"].items():
                try:
                    proj = Project.objects.get(short_name__iexact=pname)
                    projects.append((proj, proles))
                except ObjectDoesNotExist:
                    pass
        if udict.get('groups', None):
            for gname, gdict in udict["groups"].items():
                groles = []
                for grole, approved in gdict.items():
                    if approved:
                        groles.append(grole)
                groups.append((gname, groles))

    # sort by project short name
    return (projects, groups)
Beispiel #14
0
def get_all_shared_user_info(user, includeCurrentSite=True):
    """Queries all nodes (including local node) for projects and groups the user belongs to.
       Returns two lists of dictionaries but does NOT update the local database.
       Example of JSON data retrieved from each node:
      {
        "users": {
            "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin": {
                "home_site_domain": "cog-esgf.esrl.noaa.gov", 
                "openid": "https://hydra.fsl.noaa.gov/esgf-idp/openid/rootAdmin", 
                "datacart": {
                    "size": 0
                }, 
                "home_site_name": "NOAA ESRL ESGF-CoG", 
                "groups": {
                    "HIWPP": {}, 
                    "NCPP DIP": {
                        "admin": true, 
                        "publisher": true, 
                        "super": true, 
                        "user": true
                    }, 
                    "NOAA ESRL": {
                        "super": true
                    }
                }, 
                "projects": {
                    "AlaskaSummerSchool": [
                        "admin", 
                        "user"
                    ], 
                    "CF-Grids": [
                        "admin"
                    ], 
                    "CFSS": [
                        "admin", 
                        "user"
                    ], 
                .....
    """

    # dictionary of information retrieved from each node, including current node
    userDict = {}  # node --> dictionary of user information
    
    try:
        if user.profile.openid() is not None:
            
            openid = user.profile.openid()
            print 'Retrieving projects, groups for user with openid=%s' % openid
            
            # loop over remote (enabled) nodes, possibly add current node
            sites = list(getPeerSites())
            if includeCurrentSite:
                sites = sites + [Site.objects.get_current()]
            
            for site in sites:
                            
                url = "http://%s/share/user/?openid=%s" % (site.domain, openid)
                print 'Retrieving user projects and groups from URL=%s' % url
                jobj = getJson(url)
                if jobj is not None and openid in jobj['users']:
                    userDict[site] = jobj['users'][openid] 
                else:
                    print 'Openid=%s not found at site=%s' % (openid, site)
                                                            
    except UserProfile.DoesNotExist:
        pass  # user profile not yet created
    
    # restructure information as list of (project object, user roles) and (group name, group roles) tuples
    projects = []
    groups = []
    for usite, udict in userDict.items():
        if udict.get('projects', None):
            for pname, proles in udict["projects"].items():
                try:
                    proj = Project.objects.get(short_name__iexact=pname)
                    projects.append((proj, proles))
                except ObjectDoesNotExist:
                    pass
        if udict.get('groups', None):
            for gname, gdict in udict["groups"].items():
                groles = []
                for grole, approved in gdict.items():
                    if approved:
                        groles.append(grole)
                groups.append((gname,groles))

    # sort by project short name
    return (projects, groups)