Esempio n. 1
0
def authz(ident, permission=[]):
    #NOTE: g._register_silos() IS AN EXPENSIVE OPERATION. LISTING SILOS FROM DATABASE INSTEAD
    #g = ag.granary
    #g.state.revert()
    #g._register_silos()
    #granary_list = g.silos
    granary_list = list_silos()
    if permission and not type(permission).__name__ == 'list':
        permission = [permission]
    if not permission:
        permission = [] 
    silos = []
    if not ident or not 'user' in ident or not ident['user'] or not ident['user'].groups:
        return silos
    for i in ident['user'].groups:
        if i.silo == '*':
            return granary_list
        if i.silo in granary_list and not i.silo in silos:
            if not permission:
                silos.append(i.silo)
            else:
                 for p in i.permissions:
                     if p.permission_name in permission:
                         silos.append(i.silo)
    """
    user_groups = list_user_groups(ident['repoze.who.userid'])
    for g,p in user_groups:
        if g == '*':
            f = open('/var/log/databank/authz.log', 'a')
            f.write('List of all Silos: %s\n'%str(granary_list))
            f.write('List of user groups: %s\n'%str(user_groups))
            f.write('Permissions to match: %s\n'%str(permission))
            f.write('Group is *. Returning all silos\n\n')
            f.close()
            return granary_list
        if g in granary_list and not g in silos:
            if not permission:
                silos.append(g)
            elif p in permission:
                silos.append(g)
    f = open('/var/log/databank/authz.log', 'a')
    f.write('List of all Silos: %s\n'%str(granary_list))
    f.write('List of user groups: %s\n'%str(user_groups))
    f.write('Permissions to match: %s\n'%str(permission))
    f.write('List of auth Silos: %s\n\n'%str(silos))
    f.close()
    """
    return silos
Esempio n. 2
0
    def index(self):
        ident = request.environ.get('repoze.who.identity')
        c.ident = ident
        #granary_list = ag.granary.silos
        #c.silos = granary_list
        c.silos = list_silos()
        if ag.metadata_embargoed:
            if not ident:
                abort(401, "Not Authorised")
            c.silos = ag.authz(ident)

        c.silo_infos = {}
        for silo in c.silos:
            c.silo_infos[silo] = []
            state_info = ag.granary.describe_silo(silo)
            if 'title' in state_info and state_info['title']:
                c.silo_infos[silo].append(state_info['title'])
            else:
                c.silo_infos[silo].append(silo)
            c.silo_infos[silo].append(get_datasets_count(silo))
            c.silo_infos[silo].append(getSiloModifiedDate(silo))
         
        # conneg return
        accept_list = None
        if 'HTTP_ACCEPT' in request.environ:
            try:
                accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
            except:
                accept_list= [MT("text", "html")]
        if not accept_list:
            accept_list= [MT("text", "html")]
        mimetype = accept_list.pop(0)
        while(mimetype):
            if str(mimetype).lower() in ["text/html", "text/xhtml"]:
                return render('/list_of_silos.html')
            elif str(mimetype).lower() in ["text/plain", "application/json"]:
                response.content_type = 'application/json; charset="UTF-8"'
                response.status_int = 200
                response.status = "200 OK"
                return simplejson.dumps(c.silos)
            try:
                mimetype = accept_list.pop(0)
            except IndexError:
                mimetype = None
        #Whoops nothing satisfies - return text/html            
        return render('/list_of_silos.html')
Esempio n. 3
0
def authz(ident, permission=[]):
    #NOTE: g._register_silos() IS AN EXPENSIVE OPERATION. LISTING SILOS FROM DATABASE INSTEAD
    #g = ag.granary
    #g.state.revert()
    #g._register_silos()
    #granary_list = g.silos
    granary_list = list_silos()
    if permission and not type(permission).__name__ == 'list':
        permission = [permission]
    if not permission:
        permission = [] 
    silos = []
    for i in ident['user'].groups:
        if i.silo == '*':
            return granary_list
        if i.silo in granary_list and not i.silo in silos:
            if not permission:
                silos.append(i.silo)
            else:
                 for p in i.permissions:
                     if p.permission_name in permission:
                         silos.append(i.silo)
    """
    user_groups = list_user_groups(ident['repoze.who.userid'])
    for g,p in user_groups:
        if g == '*':
            f = open('/var/log/databank/authz.log', 'a')
            f.write('List of all Silos: %s\n'%str(granary_list))
            f.write('List of user groups: %s\n'%str(user_groups))
            f.write('Permissions to match: %s\n'%str(permission))
            f.write('Group is *. Returning all silos\n\n')
            f.close()
            return granary_list
        if g in granary_list and not g in silos:
            if not permission:
                silos.append(g)
            elif p in permission:
                silos.append(g)
    f = open('/var/log/databank/authz.log', 'a')
    f.write('List of all Silos: %s\n'%str(granary_list))
    f.write('List of user groups: %s\n'%str(user_groups))
    f.write('Permissions to match: %s\n'%str(permission))
    f.write('List of auth Silos: %s\n\n'%str(silos))
    f.close()
    """
    return silos
 def _get_authorised_rdf_silo(self, silo):
 
     if not ag.granary.issilo(silo):
         return SwordError(status=404, empty=True)
 
     # get the authorised list of silos
     #granary_list = ag.granary.silos
     granary_list = list_silos()
     silos = ag.authz(self.auth_credentials.identity)
     
     # does the collection/silo exist?  If not, we can't do a deposit
     if silo not in silos:
         # if it's not in the silos it is either non-existant or it is
         # forbidden...
         if silo in granary_list:
             # forbidden
             raise SwordError(status=403, empty=True)
         else:
             # not found
             raise SwordError(status=404, empty=True)
     
     # get a full silo object
     rdf_silo = ag.granary.get_rdf_silo(silo)
     return rdf_silo
Esempio n. 5
0
    def _get_authorised_rdf_silo(self, silo):

        if not ag.granary.issilo(silo):
            return SwordError(status=404, empty=True)

        # get the authorised list of silos
        #granary_list = ag.granary.silos
        granary_list = list_silos()
        silos = ag.authz(self.auth_credentials.identity)

        # does the collection/silo exist?  If not, we can't do a deposit
        if silo not in silos:
            # if it's not in the silos it is either non-existant or it is
            # forbidden...
            if silo in granary_list:
                # forbidden
                raise SwordError(status=403, empty=True)
            else:
                # not found
                raise SwordError(status=404, empty=True)

        # get a full silo object
        rdf_silo = ag.granary.get_rdf_silo(silo)
        return rdf_silo
Esempio n. 6
0
def sync_members(g):
    # NOTE: g._register_silos() IS AN EXPENSIVE OPERATION.
    # THIS FUNCTION IS EXPENSIVE AND SHOULD BE CALLED ONLY IF REALLY NECESSARY
    #g = ag.granary
    g.state.revert()
    g._register_silos()
    granary_list = g.silos

    granary_list_database = list_silos()
    usernames = list_usernames()
    for silo in granary_list:
        if not silo in granary_list_database:
            add_silo(silo)
        kw = g.describe_silo(silo)

        #Get existing owners, admins, managers and submitters from silo metadata
        owners = []
        admins = []
        managers = []
        submitters = []
        if 'administrators' in kw and kw['administrators']:
            admins = [x.strip() for x in kw['administrators'].split(",") if x]
        if 'managers' in kw and kw['managers']:
            managers = [x.strip() for x in kw['managers'].split(",") if x]
        if 'submitters' in kw and kw['submitters']:
            submitters = [x.strip() for x in kw['submitters'].split(",") if x]

        # Check users in silo metadata are valid users
        owners = [x for x in owners if x in usernames]
        admins = [x for x in admins if x in usernames]
        managers = [x for x in managers if x in usernames]
        submitters = [x for x in submitters if x in usernames]

        #Synchronize members in silo metadata with users in database 
        d_admins = []
        d_managers = []
        d_sunbmitters = []
        if silo in granary_list_database:
            d_admins, d_managers, d_submitters = list_group_usernames(silo)
            admins.extend(d_admins)
            managers.extend(d_managers)
            submitters.extend(d_submitters)

        # Ensure users are listed just once in silo metadata and owner is superset
        owners.extend(admins)
        owners.extend(managers)
        owners.extend(submitters)        
        admins = list(set(admins))
        managers = list(set(managers))
        submitters = list(set(submitters))
        owners = list(set(owners))

        # Add users in silo metadata to the database
        new_silo_users = []
        for a in admins:
            if not a in d_admins:
                new_silo_users.append((a, 'administrator'))           
        for a in managers:
            if not a in d_managers:
                new_silo_users.append((a, 'manager'))
        for a in new_submitters:
            if not a in d_submitters:
                new_silo_users.append((a, 'submitter'))
        if new_silo_users:
            add_group_users(silo, new_silo_users)

        #Write members into silo 
        kw['owners'] = ','.join(owners)
        kw['administrators'] = ','.join(admins)
        kw['managers'] = ','.join(managers)
        kw['submitters'] = ','.join(submitters)
        g.describe_silo(silo, **kw)
 
    g.sync()
    return
Esempio n. 7
0
    def datasetview(self, silo, id):
        c.silo_name = silo
        c.id = id

        http_method = request.environ['REQUEST_METHOD']

        granary_list = list_silos()
        if not silo in granary_list:
            abort(404)

        c_silo = ag.granary.get_rdf_silo(silo)
        if not c_silo.exists(id):
            abort(404)

        if ag.metadata_embargoed:
            abort(403, "DOIs cannot be issued to data packages whose metadata ia also under embargo")

        ident = request.environ.get('repoze.who.identity')  
        c.ident = ident

        item = c_silo.get_item(id)

        creator = None
        if item.manifest and item.manifest.state and 'metadata' in item.manifest.state and item.manifest.state['metadata'] and \
            'createdby' in item.manifest.state['metadata'] and item.manifest.state['metadata']['createdby']:
            creator = item.manifest.state['metadata']['createdby']

        c.version = item.currentversion
        c.version_doi = None
        c.editor = False

        #Get version number
        vnum = request.params.get('version', '') or ""
        if vnum:
            vnum = str(vnum)
            if not vnum in item.manifest['versions']:
                abort(404, "Version %s of data package %s not found"%(vnum, c.silo_name))
            c.version = vnum

        if not (http_method == "GET"):
            #identity management of item 
            if not request.environ.get('repoze.who.identity'):
                abort(401, "Not Authorised")
            silos = ag.authz(ident)      
            if silo not in silos:
                abort(403, "Forbidden")
            silos_admin = ag.authz(ident, permission='administrator')
            silos_manager = ag.authz(ident, permission='manager')
            #if not (ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]):
            if not (ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager):
                abort(403, "Forbidden")
        elif http_method == "GET":
            silos_admin = ag.authz(ident, permission='administrator')
            silos_manager = ag.authz(ident, permission='manager')
            #if ident['repoze.who.userid'] == creator or ident.get('role') in ["admin", "manager"]:
            if ident and ident['repoze.who.userid'] == creator or silo in silos_admin or silo in silos_manager:
                c.editor = True

        version_uri = "%s?version=%s"%(item.uri.rstrip('/'), c.version)
        c.version_doi = item.list_rdf_objects(URIRef(version_uri), u"http://purl.org/ontology/bibo/doi")
        if not c.version_doi or not c.version_doi[0]:
            c.version_doi = None
        else:
            c.version_doi = c.version_doi[0]

        doi_conf = OxDataciteDoi()
        doi_api = HTTPRequest(endpointhost=doi_conf.endpoint_host, secure=True)
        doi_api.setRequestUserPass(endpointuser=doi_conf.account, endpointpass=doi_conf.password)

        # conneg:
        accept_list = None
        if 'HTTP_ACCEPT' in request.environ:
            try:
                accept_list = conneg_parse(request.environ['HTTP_ACCEPT'])
            except:
                accept_list= [MT("text", "html")]
        if not accept_list:
            accept_list= [MT("text", "html")]

        c.message = None
        c.resp_status = None
        c.resp_reason = None
        c.metadata = None

        if http_method == "GET":
            #Get a list of all dois registered for this data package
            c.dois = {}
            for v in item.manifest['versions']:
                doi_ans = None
                doi_ans = item.list_rdf_objects(URIRef("%s/version%s"%(item.uri.rstrip('/'), v)), u"http://purl.org/ontology/bibo/doi")
                if doi_ans and doi_ans[0]:
                    c.dois[v] = doi_ans[0]

            c.heading = "Doi metadata information from Datacite"
            if not c.version_doi:
                mimetype = accept_list.pop(0)
                while(mimetype):
                    if str(mimetype).lower() in ["text/html", "text/xhtml"]:
                        #Doint this to avoid displaying the erro page!!!
                        response.status_int = 200
                        response.status = "200 OK"
                        c.metadata = None
                        return render('/doiview.html')
                    try:
                        mimetype = accept_list.pop(0)
                    except IndexError:
                        mimetype = None
                c.message = 'DOI not registered for version %s of data package %s'%(c.version, c.silo_name)
                return render('/doiview.html')

            resource = "%s?doi=%s"%(doi_conf.endpoint_path_metadata, c.version_doi)
            (resp, respdata) = doi_api.doHTTP_GET(resource=resource, expect_type='application/xml')
            c.resp_reason = resp.reason
            c.resp_status = resp.status
            if resp.status < 200 or resp.status >= 300:
                response.status_int = 400
                response.status = "400 Bad Request"
                response_msg = ''
                c.metadata = ''
                if resp.status == 403:
                    #TODO: Confirm 403 is not due to authorization
                    msg = "403 Forbidden - login error with Datacite or data package belongs to another party at Datacite."
                elif resp.status == 404:
                    msg = "404 Not Found - DOI does not exist in DatCite's database"
                elif resp.status == 410:
                    msg = "410 Gone - the requested data package was marked inactive (using DELETE method) at Datacite"
                elif resp.status == 500:
                    msg = "500 Internal Server Error - Error retreiving the metadata from Datacite."
                else:
                    msg = "Error retreiving the metadata from Datacite. %s"%str(resp.status)
                c.message = msg
            else:
                response.status_int = 200
                response.status = "200 OK"
                c.metadata = respdata
                response_msg = respdata
            # conneg:
            mimetype = accept_list.pop(0)
            while(mimetype):
                if str(mimetype).lower() in ["text/html", "text/xhtml"]:
                    #Setting headers to 200 to avoid displaying the error page!!!
                    response.status_int = 200
                    response.status = "200 OK"
                    return render('/doiview.html')
                elif str(mimetype).lower() in ["text/plain", "application/json"]:
                    response.content_type = 'text/plain; charset="UTF-8"'
                    return str(respdata.decode('utf-8'))
                elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
                    response.status_int = 200
                    response.status = "200 OK"
                    response.content_type = 'text/xml; charset="UTF-8"'
                    return response_msg
                try:
                    mimetype = accept_list.pop(0)
                except IndexError:
                    mimetype = None
            #Whoops - nothing staisfies - default to text/html
            #Setting headers to 200 to avoid displaying the error page!!!
            response.status_int = 200
            response.status = "200 OK"
            return render('/doiview.html')

        if http_method == "POST":
            item.set_version_cursor(c.version)
            #1a. If doi doen not exist for this version, generate doi
            register_doi = False
            if not c.version_doi:
                cnt = doi_count()
                if not cnt:
                    abort(400, "Error generating DOI")
                register_doi = True
                tiny_pid = short_pid.encode_url(cnt) 
                c.version_doi = "%s/bodleian%s.%s"%(doi_conf.prefix, tiny_pid, c.version)
            #1b. Construct XML metadata
            if c.version_doi:
                xml_metadata = get_doi_metadata(c.version_doi, item)
                c.metadata = xml_metadata
            #FOR TEST PURPOSES ONLY
            #xml_metadata = False
            # Have commented out the sections below as we will only register a DOI if there is metadata
            """
            if not xml_metadata and not register_doi:
                #2a. If the doi already exists and there is no xml metadata to update, return bad request
                c.message = "Coud not update metadata"
                response.status_int = 400
                response.status = "Bad request"
                respdata = c.message
                c.metadata = ''
            elif not xml_metadata and register_doi:
                #2b. If the doi is not registered, but there is no xml metadata to update, register just the doi with datacite
                c.heading = "Registering new DOI with Datacite"
                resource = "%s"%doi_conf.endpoint_path_doi
                body = "%s\n%s"%(c.version_doi, version_uri)
                #body_unicode = unicode(body, "utf-8")
                body_unicode = unicode(body)
                (resp, respdata) = doi_api.doHTTP_POST(body_unicode, resource=resource, data_type='text/plain;charset=UTF-8')
                c.resp_reason = resp.reason
                c.resp_status = resp.status
                if resp.status < 200 or resp.status >= 300:
                    response.status_int = 400
                    response.status = "400 Bad Request"
                    response_msg = "DOI not registered"
                    c.metadata = ''
                    if resp.status == 400:
                        msg = "400 Bad Request - Request body must be exactly two lines: DOI and URL"
                    elif resp.status == 403:
                        #TODO: Confirm 403 is not due to authorization
                        msg = "403 Forbidden - From Datacite: login problem, quota excceded, wrong domain, wrong prefix"
                    elif resp.status == 500:
                        msg = "500 Internal Server Error - Error registering the DOI."
                    else:
                        msg = "Error retreiving the metadata from Datacite. %s"%str(resp.status)
                    c.message = msg
                else:
                    #3. Add the DOI to the rdf metadata
                    item.add_namespace('bibo', "http://purl.org/ontology/bibo/")
                    item.add_triple(URIRef(version_uri), u"bibo:doi", Literal(c.version_doi))
                    item.del_triple(item.uri, u"dcterms:modified")
                    item.add_triple(item.uri, u"dcterms:modified", datetime.now())
                    item.sync()
                    response.status_int = 200
                    response.status = "200 OK"
                    response_msg = "DOI Registered. %s"%respdata
                    c.metadata = ''
                    c.message = "201 Created - DOI registered. %s"%respdata
            """
            if not xml_metadata:
                #2a. If the doi already exists and there is no xml metadata to update, return bad request
                c.message = "Could not generate metadata"
                c.metadata = ''
                abort(400, "Error generating metadata")
            elif not register_doi:
                c.message = "Could not generate DOI"
                abort(400, "Error generating DOI")
            else:
                #register the DOI with Datacite
                resource = "%s"%doi_conf.endpoint_path_doi
                body = "doi=%s\nurl=%s"%(c.version_doi, version_uri)
                body_unicode = unicode(body)
                (resp, respdata) = doi_api.doHTTP_POST(body_unicode, resource=resource, data_type='text/plain;charset=UTF-8')
                c.resp_reason1 = resp.reason
                c.resp_status1 = resp.status
                #Add the metadata within Datacite
                c.heading = "Registering new DOI along with its metadata with Datacite"
                #body_unicode = unicode(xml_metadata, "utf-8")
                #body_unicode = unicode(xml_metadata)
                body_unicode = xml_metadata
                resource = "%s?doi=%s&url=%s"%(doi_conf.endpoint_path_metadata, c.version_doi, version_uri)
                (resp, respdata) = doi_api.doHTTP_POST(body_unicode, resource=resource, data_type='application/xml;charset=UTF-8')
                c.resp_reason = resp.reason
                c.resp_status = resp.status

                if (c.resp_status1>= 200 and c.resp_status1 < 300) or (resp.status >= 200 and resp.status < 300):
                    #3. Add the DOI to the rdf metadata
                    item.add_namespace('bibo', "http://purl.org/ontology/bibo/")
                    item.add_triple(URIRef(version_uri), u"bibo:doi", Literal(c.version_doi))
                    item.del_triple(item.uri, u"dcterms:modified")
                    item.add_triple(item.uri, u"dcterms:modified", datetime.now())
                    item.sync()
                    #4. Broadcast changes to redis
                    try:
                        ag.b.change(silo, id, ident=ident['repoze.who.userid'])
                    except:
                        pass

                if resp.status < 200 or resp.status >= 300:
                    response.status_int = 400
                    response.status = "400 Bad Request"
                    response_msg = "DOI and metadata not registered"
                    c.metadata = body_unicode
                    if resp.status == 400:
                        msg = "400 Bad Request - Invalid XML metadata"
                    elif resp.status == 403:
                        #TODO: Confirm 403 is not due to authorization
                        msg = "403 Forbidden - From Datacite: login problem, quota excceded, wrong domain, wrong prefix"
                    elif resp.status == 500:
                        msg = "500 Internal Server Error - Error registering the DOI."
                    else:
                        msg = "Error retreiving the metadata from Datacite. %s"%str(resp.status)
                    c.message = msg
                else:
                    response.status_int = 200
                    response.status = "200 OK"
                    response_msg = body_unicode
                    c.metadata = body_unicode
                    c.message = "201 Created - DOI registered. %s"%respdata
            # conneg:
            mimetype = accept_list.pop(0)
            while(mimetype):
                if str(mimetype).lower() in ["text/html", "text/xhtml"]:
                    #Setting headers to 200 to avoid displaying the error page!!!
                    response.status_int = 200
                    response.status = "200 OK"
                    return render('/doiview.html')
                elif str(mimetype).lower() in ["text/plain", "application/json"]:
                    response.content_type = 'text/plain'
                    return str(respdata.decode('utf-8'))
                elif str(mimetype).lower() in ["application/rdf+xml", "text/xml"]:
                    response.status_int = 200
                    response.status = "200 OK"
                    response.content_type = 'text/xml; charset="UTF-8"'
                    return response_msg
                try:
                    mimetype = accept_list.pop(0)
                except IndexError:
                    mimetype = None
            #Whoops - nothing staisfies - default to text/html
            #Setting headers to 200 to avoid displaying the error page!!!
            response.status_int = 200
            response.status = "200 OK"
            return render('/doiview.html')
Esempio n. 8
0
def sync_members(g):
    # NOTE: g._register_silos() IS AN EXPENSIVE OPERATION.
    # THIS FUNCTION IS EXPENSIVE AND SHOULD BE CALLED ONLY IF REALLY NECESSARY
    #g = ag.granary
    g.state.revert()
    g._register_silos()
    granary_list = g.silos

    granary_list_database = list_silos()
    usernames = list_usernames()
    for silo in granary_list:
        if not silo in granary_list_database:
            add_silo(silo)
        kw = g.describe_silo(silo)

        #Get existing owners, admins, managers and submitters from silo metadata
        owners = []
        admins = []
        managers = []
        submitters = []
        if 'administrators' in kw and kw['administrators']:
            admins = [x.strip() for x in kw['administrators'].split(",") if x]
        if 'managers' in kw and kw['managers']:
            managers = [x.strip() for x in kw['managers'].split(",") if x]
        if 'submitters' in kw and kw['submitters']:
            submitters = [x.strip() for x in kw['submitters'].split(",") if x]

        # Check users in silo metadata are valid users
        owners = [x for x in owners if x in usernames]
        admins = [x for x in admins if x in usernames]
        managers = [x for x in managers if x in usernames]
        submitters = [x for x in submitters if x in usernames]

        #Synchronize members in silo metadata with users in database
        d_admins = []
        d_managers = []
        d_sunbmitters = []
        if silo in granary_list_database:
            d_admins, d_managers, d_submitters = list_group_usernames(silo)
            admins.extend(d_admins)
            managers.extend(d_managers)
            submitters.extend(d_submitters)

        # Ensure users are listed just once in silo metadata and owner is superset
        owners.extend(admins)
        owners.extend(managers)
        owners.extend(submitters)
        admins = list(set(admins))
        managers = list(set(managers))
        submitters = list(set(submitters))
        owners = list(set(owners))

        # Add users in silo metadata to the database
        new_silo_users = []
        for a in admins:
            if not a in d_admins:
                new_silo_users.append((a, 'administrator'))
        for a in managers:
            if not a in d_managers:
                new_silo_users.append((a, 'manager'))
        for a in new_submitters:
            if not a in d_submitters:
                new_silo_users.append((a, 'submitter'))
        if new_silo_users:
            add_group_users(silo, new_silo_users)

        #Write members into silo
        kw['owners'] = ','.join(owners)
        kw['administrators'] = ','.join(admins)
        kw['managers'] = ','.join(managers)
        kw['submitters'] = ','.join(submitters)
        g.describe_silo(silo, **kw)

    g.sync()
    return