Example #1
0
    def test_20_schema_info_parsing(self):
        """Retrieve the correct dataset from schema info, using File Ids"""
        msgt(self.test_20_schema_info_parsing.__doc__)

        # Schema contains file info, when file Id is an int
        #
        file_resp = DataverseManifestParams.get_file_specific_schema_info( \
            schema_test_data.schema_info_01,
            file_id=schema_test_data.schema_info_01_file_id,
            file_persistent_id=schema_test_data.schema_info_01_file_pid)

        self.assertTrue(file_resp.success is True)
        self.assertTrue('contentUrl' in file_resp.data)

        self.assertTrue(file_resp.data['contentUrl'].endswith(
            str(schema_test_data.schema_info_01_file_id)))

        # Schema contains file info, when file Id is a string
        #
        file_resp = DataverseManifestParams.get_file_specific_schema_info(\
                                            schema_test_data.schema_info_01,
                                            file_id=str(schema_test_data.schema_info_01_file_id),
                                            file_persistent_id=schema_test_data.schema_info_01_file_pid)

        self.assertTrue(file_resp.success is True)
        self.assertTrue('contentUrl' in file_resp.data)
        self.assertTrue(file_resp.data['contentUrl'].endswith(
            str(schema_test_data.schema_info_01_file_id)))
Example #2
0
    def __init__(self, manifest_params, user):
        """
        manifest_params - Django request.GET or python dict
        """
        self.mparams = DataverseManifestParams(manifest_params)

        # In memory data
        self.user_info = None
        self.schema_info = None
        self.schema_info_for_file = None
        self.ddi_info = None

        # References to Django model instances
        self.dataverse_user = None
        self.dataverse_file_info = None

        if self.mparams.has_error():
            self.add_err_msg(self.mparams.get_error_message())
            return

        if not isinstance(user, OpenDPUser):
            self.add_err_msg('User must be an OpenDPUser object')
            return

        self.user = user

        self.process_dv_request()
Example #3
0
    def test_30_schema_info_parsing_bad_id(self):
        """Bad File Id used to retrieve data from schema info"""
        msgt(self.test_30_schema_info_parsing_bad_id.__doc__)

        # Bad File Id, as a string, used to retrieve data from schema info
        bad_file_id = '63'
        file_resp = DataverseManifestParams.get_file_specific_schema_info(\
                                            schema_test_data.schema_info_01,
                                            file_id=bad_file_id,
                                            file_persistent_id=schema_test_data.schema_info_01_file_pid)
        self.assertTrue(file_resp.success is False)
        self.assertTrue(file_resp.message.find(bad_file_id) > -1)

        # Schema does NOT contain file info, bad id as int
        #
        bad_file_id = 99999
        file_resp = DataverseManifestParams.get_file_specific_schema_info(\
                                            schema_test_data.schema_info_01,
                                            file_id=bad_file_id,
                                            file_persistent_id=schema_test_data.schema_info_01_file_pid)
        self.assertTrue(file_resp.success is False)
        self.assertTrue(file_resp.message.find(str(bad_file_id)) > -1)
Example #4
0
def view_dataverse_incoming_1(request):
    """Do something with incoming DV info ..."""

    resp_info = dict(title='Process Incoming Params',
                     subtitle='Example 1: get user info, schema',
                     incoming_params=[(k, v) for k, v in request.GET.items()])

    mparams = DataverseManifestParams(request.GET)

    if mparams.has_error():
        resp_info['manifest_param_error'] = mparams.get_error_message()
    else:
        # Retrieve user info
        user_info = mparams.get_user_info()
        resp_info['user_info'] = user_info

        # Retrieve dataset citation (JSON-LD)
        schema_info = mparams.get_schema_org()
        resp_info['schema_info'] = schema_info

    return render(request, 'dataverses/view_mock_incoming_1.html', resp_info)
Example #5
0
class DataverseRequestHandler(BasicErrCheck):

    def __init__(self, manifest_params, user):
        """
        manifest_params - Django request.GET or python dict
        """
        self.mparams = DataverseManifestParams(manifest_params)

        # In memory data
        self.user_info = None
        self.schema_info = None
        self.schema_info_for_file = None
        self.ddi_info = None

        # References to Django model instances
        self.dataverse_user = None
        self.dataverse_file_info = None

        if self.mparams.has_error():
            self.add_err_msg(self.mparams.get_error_message())
            return

        if not isinstance(user, OpenDPUser):
            self.add_err_msg('User must be an OpenDPUser object')
            return

        self.user = user

        self.process_dv_request()


    def process_dv_request(self):
        """
        Main function that walks through the process
        """
        if self.has_error():
            return

        # Retrieve minimal data to do work
        #
        if not self.retrieve_user_info():
            return

        if not self.retrieve_schema_org_info():
            return

        # DDI...

        # Yes, we have all the necessary data, start updating models/tables
        #
        if not self.update_dataverse_user_info():
            return

        if not self.update_dataverse_file_info():
            return


    def retrieve_user_info(self):
        """
        User the DV API to retrieve user info
        """
        if self.has_error():
            return False

        user_info = self.mparams.get_user_info()
        if not user_info.success:
            self.add_err_msg(user_info.message)
            return False

        if isinstance(user_info.data, dict):
            if 'data' in user_info.data:
                self.user_info = user_info.data.get('data')
                return True
            else:
                user_msg = '"data" key not found in user information from Dataverse API'
        else:
            user_msg = 'user_info.data must be a Python dict'

        self.add_err_msg(user_msg)
        return False

    def retrieve_schema_org_info(self):
        """
        User the DV API to retrieve schema.org info about the dataset
        """
        if self.has_error():
            return False

        # (1) Retrieve the JSON LD info
        #
        schema_info = self.mparams.get_schema_org()
        if schema_info.status_code >= 400:
            self.add_err_msg(schema_info.message)
            return False
        self.schema_info = schema_info.json()

        # (2) Retrieve the file specific info from the JSON-LD
        #
        file_info = self.mparams.retrieve_file_specific_info(self.schema_info)
        if not file_info.success:
            self.add_err_msg(file_info.message)
            return False

        self.schema_info_for_file = file_info.data
        return True

    def update_dataverse_user_info(self):
        """
        Create or update the DataverseUser related to the OpenDP user
        """
        if self.has_error():
            return False

        test_data = {\
         'id': 11086,
         'identifier': '@raman_prasad',
         'displayName': 'Raman Prasad',
         'firstName': 'Raman',
         'lastName': 'Prasad', 'email': '*****@*****.**', 'superuser': False,
         'affiliation': 'Harvard University',
         'persistentUserId': 'https://fed.huit.harvard.edu/idp/shibboleth|[email protected]',
         'createdTime': '2000-01-01T05:00:00Z', 'lastApiUseTime': '2020-11-16T21:52:14Z',
         'authenticationProviderId': 'shib'}

        dv_persistent_id = self.user_info.get(dv_static.DV_PERSISTENT_USER_ID)
        if not dv_persistent_id:
            user_msg = (f'Could not find "{dv_static.DV_PERSISTENT_USER_ID}"'
                        f' in the Dataverse user info.')
            self.add_err_msg(user_msg)
            return False

        self.dataverse_user, _created = DataverseUser.objects.get_or_create(
                                            user=self.user,     # logged in user
                                            dv_installation=self.mparams.registerd_dataverse, # from GET request
                                            persistent_id=dv_persistent_id)     # from User Info

        # update params, if needed
        self.dataverse_user.email = self.user_info.get(dv_static.DV_EMAIL)
        self.dataverse_user.first_name = self.user_info.get(dv_static.DV_FIRST_NAME)
        self.dataverse_user.last_name = self.user_info.get(dv_static.DV_LAST_NAME)

        self.dataverse_user.save()
        return True

    def update_dataverse_file_info(self):
        """
        Retrieve or create a DataverseFileInfo object
        """
        query_params = dict(source=DataverseFileInfo.SourceChoices.Dataverse,
                            dv_installation=self.mparams.registerd_dataverse,
                            dataverse_file_id=self.mparams.fileId
                            )
        defaults = dict(creator=self.user,  # logged in user, OpenDP user
                        name=self.schema_info_for_file.get(dv_static.SCHEMA_KEY_NAME, f'DV file {self.mparams.filePid}'),
                        dataset_doi=self.mparams.datasetPid,
                        file_doi=self.mparams.filePid if self.mparams.filePid else '')

        dv_file_info, _created = DataverseFileInfo.objects.get_or_create(**query_params, defaults=defaults)

        self.dataverse_file_info = dv_file_info

        return True
Example #6
0
    def create(self, request, *args, **kwargs):
        """
        Get a Dataverse File corresponding to a user_id (UUID)
        and values from a DataverseHandoff object
        """
        # TODO: changing user_id to creator to match DB, we should standardize this naming convention
        handoff_id = request.data.get('handoff_id')
        user_id = request.data.get('creator')

        handoff = get_object_or_error_response(DataverseHandoff,
                                               object_id=handoff_id)
        dataverse_user = get_object_or_error_response(DataverseUser,
                                                      object_id=user_id)

        try:
            file_info = DataverseFileInfo.objects.get(
                dataverse_file_id=handoff.fileId,
                dv_installation=dataverse_user.dv_installation)
            if file_info.creator != request.user:
                # and depositor_setup_info is step 600 "epsilon set" and analysis_plan does not exist:
                # then user can continue to work on file
                # else:
                #   raise FileLockedException()
                return Response(
                    {
                        'success': False,
                        'message': 'File is locked by another user'
                    },
                    status=status.HTTP_423_LOCKED)
        except DataverseFileInfo.DoesNotExist:
            file_info = DataverseFileInfo(
                dv_installation=dataverse_user.dv_installation,
                dataverse_file_id=handoff.fileId,
                dataset_doi=handoff.datasetPid,
                file_doi=handoff.filePid,
                dataset_schema_info=None,
                file_schema_info=None,
                creator=dataverse_user.user)

        # If file info doesn't exist, call to Dataverse to get the data and
        # populate the relevant fields
        if not (file_info.dataset_schema_info or file_info.file_schema_info):
            params = file_info.as_dict()
            site_url = handoff.dv_installation.dataverse_url
            params[dv_static.DV_PARAM_SITE_URL] = site_url
            if not site_url:
                # shouldn't happen....
                return Response(
                    {
                        'success': False,
                        'message': 'The Dataverse url has not been set.'
                    },
                    status=status.HTTP_400_BAD_REQUEST)

            # (1) Retrieve the JSON LD info
            client = DataverseClient(site_url, handoff.apiGeneralToken)
            schema_org_resp = client.get_schema_org(handoff.datasetPid)
            if schema_org_resp.status_code >= 400:
                return Response(
                    {
                        'success': False,
                        'message': schema_org_resp.message
                    },
                    status=status.HTTP_400_BAD_REQUEST)

            # (2) Retrieve the file specific info from the JSON-LD
            #
            schema_org_content = schema_org_resp.json()
            file_schema_resp = DataverseManifestParams.get_file_specific_schema_info(
                schema_org_content, handoff.fileId, handoff.filePid)
            if not file_schema_resp.success:
                return Response(
                    {
                        'success': False,
                        'message': file_schema_resp.message
                    },
                    status=status.HTTP_400_BAD_REQUEST)

            # Update the DataverseFileInfo object
            #
            file_info.creator = dataverse_user.user
            file_info.dataset_schema_info = schema_org_content
            file_info.file_schema_info = file_schema_resp.data
            # This will fail if the dataset_schema_info is malformed, use DOI as backup just in case:
            file_info.name = file_info.dataset_schema_info.get(
                'name', file_info.dataset_doi)

            # Save the DataverseFileInfo updates
            file_info.save()

        serializer = DataverseFileInfoSerializer(file_info,
                                                 context={'request': request})
        return Response({
            'success': True,
            'data': serializer.data
        },
                        status=status.HTTP_201_CREATED)
Example #7
0
    def test_010_dv_params(self):
        """(10) Basic check of incoming DV params"""
        msgt(self.test_010_dv_params.__doc__)

        print('1. Retrieve mock params')
        #
        self.assertTrue(self.mock_params is not None)

        print('2. Are all params there? (should be yes)')
        #
        params_dict = self.mock_params.as_dict()
        dv_manifest = DataverseManifestParams(params_dict)
        #print(dv_manifest.has_error())
        #print(dv_manifest.get_err_msg())
        self.assertTrue(dv_manifest.has_error() is False)

        print('3. Test with missing param. fileId')
        #
        params_dict.pop('fileId')
        dv_manifest = DataverseManifestParams(params_dict)
        self.assertTrue(dv_manifest.has_error())

        err_msg = dv_manifest.get_error_message()
        self.assertTrue(err_msg.find('required parameter is missing') > -1)
        self.assertTrue(err_msg.find('fileId') > -1)
        print(dv_manifest.get_error_message())

        print('4. Test with missing params. fileId, apiGeneralToken')
        #
        params_dict.pop('apiGeneralToken')
        dv_manifest = DataverseManifestParams(params_dict)
        self.assertTrue(dv_manifest.has_error())

        err_msg = dv_manifest.get_error_message()
        self.assertTrue(err_msg.find('required parameters are missing') > -1)
        self.assertTrue(err_msg.find('fileId') > -1)
        self.assertTrue(err_msg.find('apiGeneralToken') > -1)
        print(dv_manifest.get_error_message())