예제 #1
0
 def test_experiment(self):
     exp = Experiment(title='test exp1',
                      institution_name='monash',
                      created_by=self.user)
     exp.save()
     self.assertEqual(exp.title, 'test exp1')
     self.assertEqual(exp.url, None)
     self.assertEqual(exp.institution_name, 'monash')
     self.assertEqual(exp.approved, False)
     self.assertEqual(exp.handle, None)
     self.assertEqual(exp.created_by, self.user)
     self.assertEqual(exp.public_access, Experiment.PUBLIC_ACCESS_NONE)
     target_id = Experiment.objects.first().id
     self.assertEqual(
         exp.get_absolute_url(), '/experiment/view/%d/' % target_id,
         exp.get_absolute_url() + ' != /experiment/view/%d/' % target_id)
     self.assertEqual(exp.get_or_create_directory(),
                      os.path.join(settings.FILE_STORE_PATH, str(exp.id)))
예제 #2
0
def register_experiment_ws_xmldata(request):
    ''' Web-service mechanism for registering an experiment, and triggering a corresponding file transfer.
        Although intended to be called as a web service, it actually works fine as a normal form, at 
        /experiment/register '''

    # --- start function body ---
    global experiment, idless_statuses, current_action, debug_POST
    experiment=None
    idless_statuses=[]
    logger.debug("Starting ingest process")
    # Check that we have received a form, abort otherwise
    try:
        if request.method != 'POST':
            # Happens when just viewing the form
            form = RegisterExperimentForm()  # An unbound form
            return send_retry_response(request, form, '')
        
        logger.info("Starting experiment ingest processing")
        
        from datetime import datetime
    
        temp_title = "Ingest Received: " + \
                     datetime.now().strftime("%A, %d. %B %Y %I:%M%p")
    
        # A form bound to the POST data
        form = RegisterExperimentForm(request.POST, request.FILES)
        
        # Check that the form is filled out, abort otherwise.
        if not form.is_valid():  
            fail_message = "Form validation failure: <br/>" \
                "Form Errors: " + str(form.errors) + "<br/>" \
    
            try:
                add_status(RegistrationStatus.ERROR, fail_message)
                return send_retry_response(request, form, '')
            except Exception as ex:
                logger.error("Really an exception %s" % ex)
       
        logger.debug("Form validation: ok")
        xmldata = request.FILES['xmldata']
        xmldata_meta = xmldata.name
        username = form.cleaned_data['username']
        originid = form.cleaned_data['originid']
        from_url = form.cleaned_data['from_url']
        owners = request.POST.getlist('experiment_owner')

        debug_POST = "username: "******"<br/>" \
            "xmldata: " + xmldata_meta + "<br/>" \
            "originid: " + originid + "<br/>" \
            "from_url: " + from_url + "<br/>" \
    
    
        user = auth_service.authenticate(request=request,
                       authMethod=localdb_auth_key)
        
        # Check user is authenticated, and user information is present, abort otherwise
        if not authentication_ok(user):
            return return_response_error(request)
        logger.debug("User authentication: ok")
        # Basic checks have passed, so create the experiment.
        global experiment
        experiment = Experiment(title=temp_title, approved=True, created_by=user,)
        experiment.save()
        
        # Now update old registration statuses with the new experiment number.
        
        for oldstatus in idless_statuses:
            rs = RegistrationStatus.objects.get(pk=oldstatus)
            rs.experiment=experiment
            rs.save()
    
        # If no owner provided, record a warning.
        check_owner(owners)        

        # Write the submitted XML file to disk
        filename = path.join(experiment.get_or_create_directory(),
                             'mets_upload.xml')
    
        f = open(filename, 'wb+')
        for chunk in xmldata.chunks():
            f.write(chunk)
        f.close()
    
        add_status(status=RegistrationStatus.PASS,
                   message="Ingest Successfully Received")
    
        # Now process METS/XML file
        current_action = "Ingest Processing"
        try:
            _registerExperimentDocument(filename=filename,
                                        created_by=user,
                                        expid=experiment.id,
                                        owners=owners,
                                        username=username)
        except:
            add_status(status=RegistrationStatus.ERROR,
                       message="METS metadata ingest failed",
                       exception=True)
            return return_response_error(request)
    
        add_status(status=RegistrationStatus.PASS,
                   message="Ingest Successfully Processed")
    
        if from_url:
        # form is ok, METS file ingested ok, and they also specified a file to transer
            current_action = 'File Transfer Request'
            logger.debug("transferring file")
            file_transfer_url = from_url + '/file_transfer/'
            do_file_transfer(file_transfer_url, originid, request) 
    
        # Success: respond with just the ID of the newly created and processed experiment.
        response = HttpResponse(str(experiment.id), status=200)
        response['Location'] = request.build_absolute_uri(
            '/experiment/view/' + str(experiment.id))
        return response
    except Exception as ex:
        add_status(RegistrationStatus.ERROR, "Unhandled exception in METS ingest.", exception=True)
def transfer_experiment(source):
    """
    Pull public experiments from source into current mytardis.
    """

    #TODO: Cleanup error messages
    #TODO: does not transfer liences as not part of METS format.
    #NOTE: As this is a pull we trust the data from the other tardis
    # Check identity of the feed
    from oaipmh.client import Client
    from oaipmh import error
    from oaipmh.metadata import MetadataRegistry, oai_dc_reader

    from django.core.cache import cache
    from django.utils.hashcompat import md5_constructor as md5

    # The cache key consists of the task name and the MD5 digest
    # of the feed URL.
    cache_key = md5("token").hexdigest()
    lock_id = "%s-lock-%s" % ("consume_experiment", cache_key)
    LOCK_EXPIRE = 60 * 5
    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    # memcache delete is very slow, but we have to use it to take
    # advantage of using add() for atomic locking
    release_lock = lambda: cache.delete(lock_id)

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    source_url = "%s/apps/oaipmh/?verb=Identify" % source

    client = Client(source_url, registry)
    try:
        identify = client.identify()
    except AttributeError as e:
        msg = "Error reading repos identity: %s:%s" % (source, e)
        logger.error(msg)
        raise ReposReadError(msg)
    except error.ErrorBase as e:
        msg = "OAIPMH error: %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except URLError as e:
        logger.error(e)
        raise
    repos = identify.baseURL()
    import urlparse
    repos_url = urlparse.urlparse(repos)
    dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc)
    if dest_name != source:
        msg = "Source directory reports incorrect name: %s" % dest_name
        logger.error(msg)
        raise BadAccessError(msg)
    # Get list of public experiments at sources
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(
        source + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc",
        registry)
    try:
        exps_metadata = [
            meta for (header, meta,
                      extra) in client.listRecords(metadataPrefix='oai_dc')
        ]
    except AttributeError as e:
        msg = "Error reading experiment %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except error.NoRecordsMatchError as e:
        msg = "no public records found on source %s" % e
        logger.warn(msg)
        return

    local_ids = []
    for exp_metadata in exps_metadata:
        exp_id = exp_metadata.getField('identifier')[0]
        user = exp_metadata.getField('creator')[0]

        found_user = _get_or_create_user(source, user)

        #make sure experiment is publicish
        try:
            xmldata = getURL("%s/apps/reposproducer/expstate/%s/" %
                             (source, exp_id))
        except HTTPError as e:
            msg = "cannot get public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        try:
            exp_state = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not exp_state in [
                Experiment.PUBLIC_ACCESS_FULL,
                Experiment.PUBLIC_ACCESS_METADATA
        ]:
            msg = 'cannot ingest private experiments.' % exp_id
            logger.error(msg)
            raise BadAccessError(msg)

        # Get the usernames of isOwner django_user ACLs for the experiment
        try:
            xmldata = getURL("%s/apps/reposproducer/acls/%s/" %
                             (source, exp_id))

        except HTTPError as e:
            msg = "Cannot get acl list of experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)
        try:
            acls = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse acl list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        owners = []
        for acl in acls:
            if acl['pluginId'] == 'django_user' and acl['isOwner']:
                user = _get_or_create_user(source, acl['entityId'])
                owners.append(user.username)
            else:
                # FIXME: skips all other types of acl for now
                pass

        # Get the METS for the experiment
        metsxml = ""
        try:
            metsxml = getURL("%s/experiment/metsexport/%s/?force_http_urls" %
                             (source, exp_id))
            #metsxml = getURL("%s/experiment/metsexport/%s/"
            #% (source, exp_id))

        except HTTPError as e:
            msg = "cannot get METS for experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)

        # load schema and parametername for experiment keys
        try:
            key_schema = Schema.objects.get(namespace=settings.KEY_NAMESPACE)
        except Schema.DoesNotExist as e:
            msg = "No ExperimentKeyService Schema found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            key_name = ParameterName.objects.get(name=settings.KEY_NAME)
        except ParameterName.DoesNotExist as e:
            msg = "No ExperimentKeyService ParameterName found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            xmldata = getURL("%s/apps/reposproducer/key/%s/" %
                             (source, exp_id))
        except HTTPError as e:
            msg = "cannot get key of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not xmldata:
            logger.warn(
                "Unable to retrieve experiment %s key.  Will try again later" %
                exp_id)
            return

        try:
            key_value = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse key list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not key_value:
            logger.warn(
                "Unable to retrieve experiment %s key value.  Will try again later"
                % exp_id)
            return

        logger.debug("retrieved key %s from experiment %s" %
                     (key_value, exp_id))
        exps = Experiment.objects.all()

        got_lock = True
        if not acquire_lock():
            logger.warning("another worker has access to consume experiment")
            return

        duplicate_exp = 0
        for exp in exps:
            #logger.warn("exp = %s" % exp.id)
            params = ExperimentParameter.objects.filter(
                name=key_name,
                parameterset__schema=key_schema,
                parameterset__experiment=exp)
            #logger.warn("params.count() = %s" % params.count())
            if params.count() >= 1:
                key = params[0].string_value
                if key == key_value:
                    duplicate_exp = exp.id
                    #logger.warn("found duplicate for %s" % duplicate_exp)
                    break

        if duplicate_exp:
            logger.warn(
                "Found duplicate experiment form %s exp %s to  exp %s" %
                (source, exp_id, duplicate_exp))
            if got_lock:
                release_lock()
            return

        # TODO: Need someway of updating and existing experiment.  Problem is
        # that copy will have different id from original, so need unique identifier
        # to allow matching

        # We have not pulled everything we need from producer and are ready to create
        # experiment.

        # Make placeholder experiment and ready metadata
        e = Experiment(
            title='Placeholder Title',
            approved=True,
            created_by=found_user,
            public_access=exp_state,
            locked=False  # so experiment can then be altered.
        )
        e.save()

        # store the key
        #eps, was_created = ExperimentParameterSet.objects.\
        #    get_or_create(experiment=e, schema=key_schema)
        #if was_created:
        #    logger.warn("was created")
        #ep, was_created = ExperimentParameter.objects.get_or_create(parameterset=eps,
        #    name=key_name,
        #    string_value=key_value)
        #if was_created:
        #    logger.warn("was created again")
        #ep.save()

        if got_lock:
            release_lock()

        local_id = e.id
        filename = path.join(e.get_or_create_directory(), 'mets_upload.xml')
        f = open(filename, 'wb+')
        f.write(metsxml)
        f.close()

        # Ingest this experiment META data and isOwner ACLS
        eid = None
        try:
            eid, sync_path = _registerExperimentDocument(filename=filename,
                                                         created_by=found_user,
                                                         expid=local_id,
                                                         owners=owners)
            logger.info('=== processing experiment %s: DONE' % local_id)
        except:
            # FIXME: what errors can mets return?
            msg = '=== processing experiment %s: FAILED!' \
                % local_id
            logger.error(msg)
            raise MetsParseError(msg)

        # FIXME: if METS parse fails then we should go back and delete the placeholder experiment

        exp = Experiment.objects.get(id=eid)

        # so that tardis does not copy the data
        for datafile in exp.get_datafiles():
            datafile.stay_remote = True
            datafile.save()

        #import nose.tools
        #nose.tools.set_trace()
        # FIXME: reverse lookup of URLs seem quite slow.
        # TODO: put this information into specific metadata schema attached to experiment
        exp.description += get_audit_message(source, exp_id)
        exp.save()

        local_ids.append(local_id)
    return local_ids
def transfer_experiment(source):
    """
    Pull public experiments from source into current mytardis.
    """

    #TODO: Cleanup error messages
    #TODO: does not transfer liences as not part of METS format.
    #NOTE: As this is a pull we trust the data from the other tardis
    # Check identity of the feed
    from oaipmh.client import Client
    from oaipmh import error
    from oaipmh.metadata import MetadataRegistry, oai_dc_reader

    from django.core.cache import cache
    from django.utils.hashcompat import md5_constructor as md5

    # The cache key consists of the task name and the MD5 digest
    # of the feed URL.
    cache_key = md5("token").hexdigest()
    lock_id = "%s-lock-%s" % ("consume_experiment", cache_key)
    LOCK_EXPIRE = 60 * 5
    # cache.add fails if if the key already exists
    acquire_lock = lambda: cache.add(lock_id, "true", LOCK_EXPIRE)
    # memcache delete is very slow, but we have to use it to take
    # advantage of using add() for atomic locking
    release_lock = lambda: cache.delete(lock_id)

    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    source_url = "%s/apps/oaipmh/?verb=Identify" % source

    client = Client(source_url, registry)
    try:
        identify = client.identify()
    except AttributeError as e:
        msg = "Error reading repos identity: %s:%s" % (source, e)
        logger.error(msg)
        raise ReposReadError(msg)
    except error.ErrorBase as e:
        msg = "OAIPMH error: %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except URLError as e:
        logger.error(e)
        raise
    repos = identify.baseURL()
    import urlparse
    repos_url = urlparse.urlparse(repos)
    dest_name = "%s://%s" % (repos_url.scheme, repos_url.netloc)
    if dest_name != source:
        msg = "Source directory reports incorrect name: %s" % dest_name
        logger.error(msg)
        raise BadAccessError(msg)
    # Get list of public experiments at sources
    registry = MetadataRegistry()
    registry.registerReader('oai_dc', oai_dc_reader)
    client = Client(source
        + "/apps/oaipmh/?verb=ListRecords&metadataPrefix=oai_dc", registry)
    try:
        exps_metadata = [meta
            for (header, meta, extra)
            in client.listRecords(metadataPrefix='oai_dc')]
    except AttributeError as e:
        msg = "Error reading experiment %s" % e
        logger.error(msg)
        raise OAIPMHError(msg)
    except error.NoRecordsMatchError as e:
        msg = "no public records found on source %s" % e
        logger.warn(msg)
        return

    local_ids = []
    for exp_metadata in exps_metadata:
        exp_id = exp_metadata.getField('identifier')[0]
        user = exp_metadata.getField('creator')[0]

        found_user = _get_or_create_user(source, user)

        #make sure experiment is publicish
        try:
            xmldata = getURL("%s/apps/reposproducer/expstate/%s/"
            % (source, exp_id))
        except HTTPError as e:
            msg = "cannot get public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        try:
            exp_state = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse public state of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not exp_state in [Experiment.PUBLIC_ACCESS_FULL,
                              Experiment.PUBLIC_ACCESS_METADATA]:
            msg = 'cannot ingest private experiments.' % exp_id
            logger.error(msg)
            raise BadAccessError(msg)

        # Get the usernames of isOwner django_user ACLs for the experiment
        try:
            xmldata = getURL("%s/apps/reposproducer/acls/%s/"
            % (source, exp_id))

        except HTTPError as e:
            msg = "Cannot get acl list of experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)
        try:
            acls = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse acl list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        owners = []
        for acl in acls:
            if acl['pluginId'] == 'django_user' and acl['isOwner']:
                user = _get_or_create_user(source, acl['entityId'])
                owners.append(user.username)
            else:
                # FIXME: skips all other types of acl for now
                pass

        # Get the METS for the experiment
        metsxml = ""
        try:
            metsxml = getURL("%s/experiment/metsexport/%s/?force_http_urls"
            % (source, exp_id))
            #metsxml = getURL("%s/experiment/metsexport/%s/"
            #% (source, exp_id))

        except HTTPError as e:
            msg = "cannot get METS for experiment %s" % exp_id
            logger.error(msg)
            raise ReposReadError(msg)

        # load schema and parametername for experiment keys
        try:
            key_schema = Schema.objects.get(namespace=settings.KEY_NAMESPACE)
        except Schema.DoesNotExist as e:
            msg = "No ExperimentKeyService Schema found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            key_name = ParameterName.objects.get(name=settings.KEY_NAME)
        except ParameterName.DoesNotExist as e:
            msg = "No ExperimentKeyService ParameterName found"
            logger.error(msg)
            raise BadAccessError(msg)

        try:
            xmldata = getURL("%s/apps/reposproducer/key/%s/"
            % (source, exp_id))
        except HTTPError as e:
            msg = "cannot get key of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not xmldata:
            logger.warn("Unable to retrieve experiment %s key.  Will try again later" % exp_id)
            return

        try:
            key_value = json.loads(xmldata)
        except ValueError as e:
            msg = "cannot parse key list of experiment %s" % exp_id
            logger.error(msg)
            raise BadAccessError(msg)
        if not key_value:
            logger.warn("Unable to retrieve experiment %s key value.  Will try again later" % exp_id)
            return

        logger.debug("retrieved key %s from experiment %s" % (key_value, exp_id))
        exps = Experiment.objects.all()

        got_lock = True
        if not acquire_lock():
            logger.warning("another worker has access to consume experiment")
            return

        duplicate_exp = 0
        for exp in exps:
            #logger.warn("exp = %s" % exp.id)
            params = ExperimentParameter.objects.filter(name=key_name,
                                    parameterset__schema=key_schema,
                                    parameterset__experiment=exp)
            #logger.warn("params.count() = %s" % params.count())
            if params.count() >= 1:
                key = params[0].string_value
                if key == key_value:
                    duplicate_exp = exp.id
                    #logger.warn("found duplicate for %s" % duplicate_exp)
                    break

        if duplicate_exp:
            logger.warn("Found duplicate experiment form %s exp %s to  exp %s"
                % (source, exp_id, duplicate_exp))
            if got_lock:
                release_lock()
            return

        # TODO: Need someway of updating and existing experiment.  Problem is
        # that copy will have different id from original, so need unique identifier
        # to allow matching

        # We have not pulled everything we need from producer and are ready to create
        # experiment.

        # Make placeholder experiment and ready metadata
        e = Experiment(
            title='Placeholder Title',
            approved=True,
            created_by=found_user,
            public_access=exp_state,
            locked=False  # so experiment can then be altered.
            )
        e.save()

        # store the key
        #eps, was_created = ExperimentParameterSet.objects.\
        #    get_or_create(experiment=e, schema=key_schema)
        #if was_created:
        #    logger.warn("was created")
        #ep, was_created = ExperimentParameter.objects.get_or_create(parameterset=eps,
        #    name=key_name,
        #    string_value=key_value)
        #if was_created:
        #    logger.warn("was created again")
        #ep.save()

        if got_lock:
            release_lock()

        local_id = e.id
        filename = path.join(e.get_or_create_directory(),
                             'mets_upload.xml')
        f = open(filename, 'wb+')
        f.write(metsxml)
        f.close()

        # Ingest this experiment META data and isOwner ACLS
        eid = None
        try:
            eid, sync_path = _registerExperimentDocument(filename=filename,
                                               created_by=found_user,
                                               expid=local_id,
                                               owners=owners)
            logger.info('=== processing experiment %s: DONE' % local_id)
        except:
            # FIXME: what errors can mets return?
            msg = '=== processing experiment %s: FAILED!' \
                % local_id
            logger.error(msg)
            raise MetsParseError(msg)

        # FIXME: if METS parse fails then we should go back and delete the placeholder experiment

        exp = Experiment.objects.get(id=eid)

        # so that tardis does not copy the data
        for datafile in exp.get_datafiles():
            datafile.stay_remote = True
            datafile.save()

        #import nose.tools
        #nose.tools.set_trace()
        # FIXME: reverse lookup of URLs seem quite slow.
        # TODO: put this information into specific metadata schema attached to experiment
        exp.description += get_audit_message(source, exp_id)
        exp.save()

        local_ids.append(local_id)
    return local_ids
예제 #5
0
def register_experiment_ws_xmldata(request):

    status = ''
    if request.method == 'POST':  # If the form has been submitted...

        # A form bound to the POST data
        form = RegisterExperimentForm(request.POST, request.FILES)
        if form.is_valid():  # All validation rules pass

            xmldata = request.FILES['xmldata']
            username = form.cleaned_data['username']
            origin_id = form.cleaned_data['originid']
            from_url = form.cleaned_data['from_url']

            user = auth_service.authenticate(request=request,
                                             authMethod=localdb_auth_key)
            if user:
                if not user.is_active:
                    return return_response_error(request)
            else:
                return return_response_error(request)

            e = Experiment(
                title='Placeholder Title',
                approved=True,
                created_by=user,
                )
            e.save()
            eid = e.id

            filename = path.join(e.get_or_create_directory(),
                                 'mets_upload.xml')
            f = open(filename, 'wb+')
            for chunk in xmldata.chunks():
                f.write(chunk)
            f.close()

            logger.info('=== processing experiment: START')
            owners = request.POST.getlist('experiment_owner')
            try:
                _registerExperimentDocument(filename=filename,
                                            created_by=user,
                                            expid=eid,
                                            owners=owners,
                                            username=username)
                logger.info('=== processing experiment %s: DONE' % eid)
            except:
                logger.exception('=== processing experiment %s: FAILED!' % eid)
                return return_response_error(request)

            if from_url:
                logger.debug('=== sending file request')
                logger.info('Sending received_remote signal')
                from tardis.tardis_portal.signals import received_remote
                received_remote.send(sender=Experiment,
                        instance=e,
                        uid=origin_id,
                        from_url=from_url)

            response = HttpResponse(str(eid), status=200)
            response['Location'] = request.build_absolute_uri(
                '/experiment/view/' + str(eid))
            return response
    else:
        form = RegisterExperimentForm()  # An unbound form

    c = Context({
        'form': form,
        'status': status,
        'subtitle': 'Register Experiment',
        'searchDatafileSelectionForm': getNewSearchDatafileSelectionForm()})
    return HttpResponse(render_response_index(request,
                        'tardis_portal/register_experiment.html', c))