Esempio n. 1
0
    def process_submission_as_contract(self, submission_dict, project):
        try:
            partner_accession = submission_dict['submitting_institution']
        except KeyError:
            raise DatasetImportError(
                data='Submitting institute info missing. Aborting import!')

        try:
            partner = Partner.objects.get(elu_accession=partner_accession)
        except Partner.DoesNotExist:
            raise DatasetImportError(
                data=
                'Partner institute with accession {} not found in DB. Aborting import.'
                .format(partner_accession))

        if self.is_elixir_submission(submission_dict):
            try:
                contract = Contract.objects.get(
                    project=project, partners_roles__partner=partner)
            except Contract.DoesNotExist:
                contract = Contract.objects.create(project=project, )
                contract.company_roles.add(GDPRRole["joint_controller"])
                contract.add_partner_with_role(partner,
                                               GDPRRole["joint_controller"])
                contract.local_custodians.set(project.local_custodians.all())
                contract.save()
        return contract
Esempio n. 2
0
    def process_data_locations(self, dataset, dataset_dict):
        data_locations = []
        backend_mapping = {
            'aspera': 'lcsb-aspera',
            'atlas': 'atlas-server',
            'atlas_personal': 'atlas-server',
            'atlas_project': 'atlas-server',
            'hpc_backup_gaia': 'gaia-cluster',
            'hpc_gaia_home': 'gaia-cluster',
            'hpc_gaia_project': 'gaia-cluster',
            'hpc_gaia_work': 'gaia-cluster',
            'hpc_isilon': 'hpc-isilon',
            'lcsb_desktop': 'uni-desktop',
            'external storage  (e.g. hard disk, dvd)': 'external-device',
            'lcsb_group_server': 'group-server',
            'lcsb_laptop': 'uni-laptop',
            'owncloud': 'lcsb-owncloud',
            'personal_laptop': 'personal-laptop',
            'sample-storage': 'sample-storage',
            'other': 'other'
        }
        if 'storages' in dataset_dict:

            for storage_location_dict in dataset_dict['storages']:
                backend_name = storage_location_dict['platform'].lower().strip(
                )
                backend_name = backend_mapping.get(backend_name, backend_name)
                if not backend_name:
                    raise DatasetImportError(
                        data=f'Not a proper backend name: "{backend_name}".')
                try:
                    backend = StorageResource.objects.get(slug=backend_name)
                except StorageResource.DoesNotExist:
                    raise DatasetImportError(
                        data=
                        f'Cannot find StorageResource with slug: "{backend_name}".'
                    )
                category = self.process_category(storage_location_dict)

                location_delimeted = '\n'.join(
                    storage_location_dict['locations'])

                dl = DataLocation.objects.create(
                    category=category,
                    backend=backend,
                    dataset=dataset,
                    **{'location_description': location_delimeted})
                master_locations = DataLocation.objects.filter(
                    category=StorageLocationCategory.master, dataset=dataset)

                acl_policy_description = self.process_acl_info(
                    storage_location_dict)
                if acl_policy_description:
                    acc = Access.objects.create(
                        dataset=dataset, access_notes=acl_policy_description)
                    acc.defined_on_locations.set(master_locations)
                    acc.save()
                data_locations.append(dl)
        return data_locations
Esempio n. 3
0
    def process_submission_as_dataset(self, submission_dict, project):
        try:
            elu_accession = submission_dict['elu_accession']
        except KeyError:
            raise DatasetImportError(
                data='submission without accession number')

        dataset = Dataset.objects.filter(title=elu_accession.strip()).first()
        if dataset is not None:
            logger.warning(
                "Dataset with title '{}' already found. It will be updated.".
                format(elu_accession.strip()))
        else:

            dataset = Dataset.objects.create(title=elu_accession.strip())

        dataset.project = project

        created_on_str = submission_dict['created_on']
        title = submission_dict['title']
        scope_str = 'Elixir' if submission_dict[
            'scope'] == 'e' else 'LCSB Collaboration'
        local_project_str = submission_dict.get('local_project', '')
        dataset.comments = "ELU Accession: {}\nTitle: {}\nCreated On: {}\nScope: {}\nSubmitted to Project: {}".format(
            elu_accession, title, created_on_str, scope_str, local_project_str)

        local_custodians = DatasetsImporter.process_local_custodians(
            submission_dict)
        if local_custodians:
            dataset.local_custodians.set(local_custodians)
        dataset.save()
        return dataset
Esempio n. 4
0
    def process_submission_as_dataset(self, submission_dict, project):
        try:
            elu_accession = submission_dict['elu_accession']
        except KeyError:
            raise DatasetImportError(data='submission without accession number')

        dataset = Dataset.objects.filter(title=elu_accession.strip()).first()
        if dataset is not None:
            msg = f"Dataset with title '{elu_accession.strip()}' already found. It will be updated."
            logger.warning(msg)
        else:

            dataset = Dataset.objects.create(title=elu_accession.strip())

        dataset.project = project

        created_on_str = submission_dict['created_on']
        title = submission_dict['name']
        scope_str = 'Elixir' if submission_dict['scope'] == 'e' else 'LCSB Collaboration'
        local_project_str = submission_dict.get('local_project', '')
        dataset.comments = f"ELU Accession: {elu_accession}\nTitle: {title}\nCreated On: {created_on_str}\nScope: {scope_str}\nSubmitted to Project: {local_project_str}"

        local_custodians, local_personnel, external_contacts = self.process_contacts(submission_dict)

        if local_custodians:
            dataset.local_custodians.set(local_custodians, clear=True)

        dataset.save()

        return dataset
Esempio n. 5
0
 def process_datatypes(self, datadec_dict):
     datatypes = []
     for datatype_str in datadec_dict.get('data_types', []):
         datatype_str = datatype_str.strip()
         try:
             datatype, _ = DataType.objects.get_or_create(name=datatype_str)
         except DataType.DoesNotExist:
             self.logger.error('Import failed')
             raise DatasetImportError(data=f'Cannot find data type: "{datatype_str}".')
         datatypes.append(datatype)
     return datatypes
Esempio n. 6
0
    def process_datadeclaration(self, datadec_dict, dataset):
        try:
            title = datadec_dict['title']
            title_to_show = title.encode('utf-8')
        except KeyError:
            raise DatasetImportError(data='Data declaration title missing')

        try:
            datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset)
        except DataDeclaration.DoesNotExist:
            datadec = None

        if datadec:
            msg = f"Data declaration with title '{title_to_show}' already found. It will be updated."
            self.logger.warning(msg)
        else:
            datadec = DataDeclaration.objects.create(title=title, dataset=dataset)

        if 'source_study' not in datadec_dict or len(datadec_dict.get('source_study')) == 0:
            self.logger.warning(f"Data declaration with title '{title_to_show}' has no `source_study` set - there will be a problem processing study/cohort data.")

        datadec.has_special_subjects = datadec_dict.get('has_special_subjects', False)
        datadec.data_types_notes = datadec_dict.get('data_type_notes', None)
        datadec.deidentification_method = self.process_deidentification_method(datadec_dict)
        datadec.subjects_category = self.process_subjects_category(datadec_dict)
        datadec.special_subjects_description = datadec_dict.get('special_subjects_description', None)
        datadec.other_external_id = datadec_dict.get('other_external_id', None)
        datadec.share_category = self.process_access_category(datadec_dict)
        datadec.access_procedure = datadec_dict.get('access_procedure', '')
        datadec.consent_status = self.process_constent_status(datadec_dict)
        datadec.comments = datadec_dict.get('source_notes', None)
        datadec.embargo_date = datadec_dict.get('embargo_date', None)
        datadec.storage_duration_criteria = datadec_dict.get("storage_duration_criteria", None)
        datadec.end_of_storage_duration = datadec_dict.get("storage_end_date", None)
        if 'data_types' in datadec_dict:
            datadec.data_types_received.set(self.process_datatypes(datadec_dict))

        # if 'contract_obj' not in kwargs:
        #     if 'source_collaboration' in datadec_dict:
        #         datadec.contract = self.process_source_contract(dataset, datadec_dict)
        # else:
        #     datadec.contract = kwargs.pop('contract_obj')
        # if datadec.contract:
        #     datadec.partner = datadec.contract.partners.first()
        self.process_use_restrictions(datadec, datadec_dict)
        datadec.dataset = dataset
        datadec.save()
        datadec.updated = True

        return datadec, datadec_dict.get('source_study')
Esempio n. 7
0
    def process_dataset(self, dataset_dict):
        try:
            title = dataset_dict['name']
        except KeyError:
            raise DatasetImportError(data='dataset without title')

        title = title.strip()

        try:
            dataset = Dataset.objects.get(title=title)
        except Dataset.DoesNotExist:
            dataset = None

        if dataset:
            self.logger.warning(
                "Dataset with title '{}' already found. It will be updated.".
                format(title))
        else:
            dataset = Dataset.objects.create(title=title)

        if 'project' in dataset_dict:
            dataset.project = self.process_project(dataset_dict['project'])

        dataset.sensitivity = dataset_dict.get('sensitivity', None)

        local_custodians, local_personnel, external_contacts = self.process_contacts(
            dataset_dict)

        if local_custodians:
            dataset.local_custodians.set(local_custodians, clear=True)

        data_locations = self.process_data_locations(dataset, dataset_dict)
        if data_locations:
            dataset.data_locations.set(data_locations, bulk=False)

        # users_with_access = self.process_user_acl(storage_location_dict)
        # if users_with_access:
        #     dl.users_with_access.set(users_with_access, bulk=False)
        # if 'storage_acl_notes' in storage_location_dict:
        #     dl.access_notes = storage_location_dict['storage_acl_notes']

        shares = self.process_transfers(dataset_dict, dataset)
        if shares:
            dataset.shares.set(shares, bulk=False)

        dataset.save()
        for local_custodian in local_custodians:
            local_custodian.assign_permissions_to_dataset(dataset)

        self.process_datadeclarations(dataset_dict, dataset)
Esempio n. 8
0
    def process_datadeclaration(self, datadec_dict, dataset):
        try:
            title = datadec_dict['title']
        except KeyError:
            raise DatasetImportError(data='Data declaration title missing')

        try:
            datadec = DataDeclaration.objects.get(title=title.strip(),
                                                  dataset=dataset)
        except DataDeclaration.DoesNotExist:
            datadec = None

        if datadec:
            self.logger.warning(
                "Data declaration with title '{}' already found. It will be updated."
                .format(title))
        else:
            datadec = DataDeclaration.objects.create(title=title,
                                                     dataset=dataset)

        datadec.has_special_subjects = datadec_dict.get(
            'has_special_subjects', False)
        datadec.data_types_notes = datadec_dict.get('data_type_notes', None)
        datadec.deidentification_method = self.process_deidentification_method(
            datadec_dict)
        datadec.subjects_category = self.process_subjects_category(
            datadec_dict)
        datadec.special_subjects_description = datadec_dict.get(
            'special_subjects_description', None)
        datadec.other_external_id = datadec_dict.get('other_external_id', None)
        datadec.share_category = self.process_access_category(datadec_dict)
        datadec.consent_status = self.process_constent_status(datadec_dict)
        datadec.comments = datadec_dict.get('source_notes', None)

        if 'data_types' in datadec_dict:
            datadec.data_types_received.set(
                self.process_datatypes(datadec_dict))

        # if 'contract_obj' not in kwargs:
        #     if 'source_collaboration' in datadec_dict:
        #         datadec.contract = self.process_source_contract(dataset, datadec_dict)
        # else:
        #     datadec.contract = kwargs.pop('contract_obj')
        # if datadec.contract:
        #     datadec.partner = datadec.contract.partners.first()
        self.process_use_restrictions(datadec, datadec_dict)
        datadec.dataset = dataset
        datadec.save()
Esempio n. 9
0
    def process_study(self, study_dict):
        try:
            title = study_dict['title']
        except KeyError:
            raise DatasetImportError(data='study without title')

        description = study_dict.get('description', None)
        ethics_approval_exists = study_dict.get('ethics_approval_exists',
                                                False)
        ethics_notes = "The submitter confirms that an ethics approval exists for the data collection, sharing and \
        the purposes for which the data is shared." if ethics_approval_exists else None

        existing_project = Project.objects.filter(title=title).first()
        if existing_project is not None:
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            logger.warning(
                "Project with title '{}' already found. It will be imported again with timestamp {}."
                .format(title, timestamp))
            title = title + timestamp

        project = Project.objects.create(title=title,
                                         description=description,
                                         has_cner=ethics_approval_exists,
                                         cner_notes=ethics_notes)
        contacts = self.process_external_contacts(
            study_dict.get('contacts', []))

        if contacts:
            project.contacts.set(contacts)
            project.save()

        # study_types = self.process_studytypes(study_dict)
        # if study_types:
        #     project.study_types.set(study_types)
        #     project.save()

        return project
Esempio n. 10
0
    def process_json(self, dataset_dict):
        try:
            title = dataset_dict['name']
        except KeyError:
            raise DatasetImportError(data='dataset without title')

        title = title.strip()

        try:
            dataset = Dataset.objects.get(title=title)
        except Dataset.DoesNotExist:
            dataset = None

        if dataset:
            title_to_show = title.encode('utf8')
            self.logger.warning(f"Dataset with title '{title_to_show}' already found. It will be updated.")
        else:
            dataset = Dataset.objects.create(title=title)

        if 'project' in dataset_dict and dataset_dict['project']:
            dataset.project = self.process_project(dataset_dict['project'])

        dataset.sensitivity = dataset_dict.get('sensitivity', None)

        local_custodians, local_personnel, external_contacts = self.process_contacts(dataset_dict.get("contacts", []))

        if local_custodians:
            dataset.local_custodians.set(local_custodians, clear=True)

        data_locations = self.process_data_locations(dataset, dataset_dict)
        if data_locations:
            dataset.data_locations.set(data_locations, bulk=False)

        # users_with_access = self.process_user_acl(storage_location_dict)
        # if users_with_access:
        #     dl.users_with_access.set(users_with_access, bulk=False)
        # if 'storage_acl_notes' in storage_location_dict:
        #     dl.access_notes = storage_location_dict['storage_acl_notes']

        shares = self.process_transfers(dataset_dict, dataset)
        if shares:
            dataset.shares.set(shares, bulk=False)

        dataset.save()
        dataset.updated = True
        for local_custodian in local_custodians:
            local_custodian.assign_permissions_to_dataset(dataset)

        studies_map = self.process_datadeclarations(dataset_dict, dataset)

        # Must be run after processing data declarations
        self.process_studies(dataset_dict, studies_map)

        # Must be run after processing data declarations
        legal_bases = self.process_legal_bases(dataset_dict, dataset)
        if legal_bases:
            dataset.legal_basis_definitions.set(legal_bases, bulk=False)

        dataset.save()

        return True