def process_submission_as_contract(self, submission_dict, project): try: partner_accession = submission_dict['submitting_institution'] except KeyError: raise DatasetImportError( data='Submitting institute info missing. Aborting import!') try: partner = Partner.objects.get(elu_accession=partner_accession) except Partner.DoesNotExist: raise DatasetImportError( data= 'Partner institute with accession {} not found in DB. Aborting import.' .format(partner_accession)) if self.is_elixir_submission(submission_dict): try: contract = Contract.objects.get( project=project, partners_roles__partner=partner) except Contract.DoesNotExist: contract = Contract.objects.create(project=project, ) contract.company_roles.add(GDPRRole["joint_controller"]) contract.add_partner_with_role(partner, GDPRRole["joint_controller"]) contract.local_custodians.set(project.local_custodians.all()) contract.save() return contract
def process_data_locations(self, dataset, dataset_dict): data_locations = [] backend_mapping = { 'aspera': 'lcsb-aspera', 'atlas': 'atlas-server', 'atlas_personal': 'atlas-server', 'atlas_project': 'atlas-server', 'hpc_backup_gaia': 'gaia-cluster', 'hpc_gaia_home': 'gaia-cluster', 'hpc_gaia_project': 'gaia-cluster', 'hpc_gaia_work': 'gaia-cluster', 'hpc_isilon': 'hpc-isilon', 'lcsb_desktop': 'uni-desktop', 'external storage (e.g. hard disk, dvd)': 'external-device', 'lcsb_group_server': 'group-server', 'lcsb_laptop': 'uni-laptop', 'owncloud': 'lcsb-owncloud', 'personal_laptop': 'personal-laptop', 'sample-storage': 'sample-storage', 'other': 'other' } if 'storages' in dataset_dict: for storage_location_dict in dataset_dict['storages']: backend_name = storage_location_dict['platform'].lower().strip( ) backend_name = backend_mapping.get(backend_name, backend_name) if not backend_name: raise DatasetImportError( data=f'Not a proper backend name: "{backend_name}".') try: backend = StorageResource.objects.get(slug=backend_name) except StorageResource.DoesNotExist: raise DatasetImportError( data= f'Cannot find StorageResource with slug: "{backend_name}".' ) category = self.process_category(storage_location_dict) location_delimeted = '\n'.join( storage_location_dict['locations']) dl = DataLocation.objects.create( category=category, backend=backend, dataset=dataset, **{'location_description': location_delimeted}) master_locations = DataLocation.objects.filter( category=StorageLocationCategory.master, dataset=dataset) acl_policy_description = self.process_acl_info( storage_location_dict) if acl_policy_description: acc = Access.objects.create( dataset=dataset, access_notes=acl_policy_description) acc.defined_on_locations.set(master_locations) acc.save() data_locations.append(dl) return data_locations
def process_submission_as_dataset(self, submission_dict, project): try: elu_accession = submission_dict['elu_accession'] except KeyError: raise DatasetImportError( data='submission without accession number') dataset = Dataset.objects.filter(title=elu_accession.strip()).first() if dataset is not None: logger.warning( "Dataset with title '{}' already found. It will be updated.". format(elu_accession.strip())) else: dataset = Dataset.objects.create(title=elu_accession.strip()) dataset.project = project created_on_str = submission_dict['created_on'] title = submission_dict['title'] scope_str = 'Elixir' if submission_dict[ 'scope'] == 'e' else 'LCSB Collaboration' local_project_str = submission_dict.get('local_project', '') dataset.comments = "ELU Accession: {}\nTitle: {}\nCreated On: {}\nScope: {}\nSubmitted to Project: {}".format( elu_accession, title, created_on_str, scope_str, local_project_str) local_custodians = DatasetsImporter.process_local_custodians( submission_dict) if local_custodians: dataset.local_custodians.set(local_custodians) dataset.save() return dataset
def process_submission_as_dataset(self, submission_dict, project): try: elu_accession = submission_dict['elu_accession'] except KeyError: raise DatasetImportError(data='submission without accession number') dataset = Dataset.objects.filter(title=elu_accession.strip()).first() if dataset is not None: msg = f"Dataset with title '{elu_accession.strip()}' already found. It will be updated." logger.warning(msg) else: dataset = Dataset.objects.create(title=elu_accession.strip()) dataset.project = project created_on_str = submission_dict['created_on'] title = submission_dict['name'] scope_str = 'Elixir' if submission_dict['scope'] == 'e' else 'LCSB Collaboration' local_project_str = submission_dict.get('local_project', '') dataset.comments = f"ELU Accession: {elu_accession}\nTitle: {title}\nCreated On: {created_on_str}\nScope: {scope_str}\nSubmitted to Project: {local_project_str}" local_custodians, local_personnel, external_contacts = self.process_contacts(submission_dict) if local_custodians: dataset.local_custodians.set(local_custodians, clear=True) dataset.save() return dataset
def process_datatypes(self, datadec_dict): datatypes = [] for datatype_str in datadec_dict.get('data_types', []): datatype_str = datatype_str.strip() try: datatype, _ = DataType.objects.get_or_create(name=datatype_str) except DataType.DoesNotExist: self.logger.error('Import failed') raise DatasetImportError(data=f'Cannot find data type: "{datatype_str}".') datatypes.append(datatype) return datatypes
def process_datadeclaration(self, datadec_dict, dataset): try: title = datadec_dict['title'] title_to_show = title.encode('utf-8') except KeyError: raise DatasetImportError(data='Data declaration title missing') try: datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset) except DataDeclaration.DoesNotExist: datadec = None if datadec: msg = f"Data declaration with title '{title_to_show}' already found. It will be updated." self.logger.warning(msg) else: datadec = DataDeclaration.objects.create(title=title, dataset=dataset) if 'source_study' not in datadec_dict or len(datadec_dict.get('source_study')) == 0: self.logger.warning(f"Data declaration with title '{title_to_show}' has no `source_study` set - there will be a problem processing study/cohort data.") datadec.has_special_subjects = datadec_dict.get('has_special_subjects', False) datadec.data_types_notes = datadec_dict.get('data_type_notes', None) datadec.deidentification_method = self.process_deidentification_method(datadec_dict) datadec.subjects_category = self.process_subjects_category(datadec_dict) datadec.special_subjects_description = datadec_dict.get('special_subjects_description', None) datadec.other_external_id = datadec_dict.get('other_external_id', None) datadec.share_category = self.process_access_category(datadec_dict) datadec.access_procedure = datadec_dict.get('access_procedure', '') datadec.consent_status = self.process_constent_status(datadec_dict) datadec.comments = datadec_dict.get('source_notes', None) datadec.embargo_date = datadec_dict.get('embargo_date', None) datadec.storage_duration_criteria = datadec_dict.get("storage_duration_criteria", None) datadec.end_of_storage_duration = datadec_dict.get("storage_end_date", None) if 'data_types' in datadec_dict: datadec.data_types_received.set(self.process_datatypes(datadec_dict)) # if 'contract_obj' not in kwargs: # if 'source_collaboration' in datadec_dict: # datadec.contract = self.process_source_contract(dataset, datadec_dict) # else: # datadec.contract = kwargs.pop('contract_obj') # if datadec.contract: # datadec.partner = datadec.contract.partners.first() self.process_use_restrictions(datadec, datadec_dict) datadec.dataset = dataset datadec.save() datadec.updated = True return datadec, datadec_dict.get('source_study')
def process_dataset(self, dataset_dict): try: title = dataset_dict['name'] except KeyError: raise DatasetImportError(data='dataset without title') title = title.strip() try: dataset = Dataset.objects.get(title=title) except Dataset.DoesNotExist: dataset = None if dataset: self.logger.warning( "Dataset with title '{}' already found. It will be updated.". format(title)) else: dataset = Dataset.objects.create(title=title) if 'project' in dataset_dict: dataset.project = self.process_project(dataset_dict['project']) dataset.sensitivity = dataset_dict.get('sensitivity', None) local_custodians, local_personnel, external_contacts = self.process_contacts( dataset_dict) if local_custodians: dataset.local_custodians.set(local_custodians, clear=True) data_locations = self.process_data_locations(dataset, dataset_dict) if data_locations: dataset.data_locations.set(data_locations, bulk=False) # users_with_access = self.process_user_acl(storage_location_dict) # if users_with_access: # dl.users_with_access.set(users_with_access, bulk=False) # if 'storage_acl_notes' in storage_location_dict: # dl.access_notes = storage_location_dict['storage_acl_notes'] shares = self.process_transfers(dataset_dict, dataset) if shares: dataset.shares.set(shares, bulk=False) dataset.save() for local_custodian in local_custodians: local_custodian.assign_permissions_to_dataset(dataset) self.process_datadeclarations(dataset_dict, dataset)
def process_datadeclaration(self, datadec_dict, dataset): try: title = datadec_dict['title'] except KeyError: raise DatasetImportError(data='Data declaration title missing') try: datadec = DataDeclaration.objects.get(title=title.strip(), dataset=dataset) except DataDeclaration.DoesNotExist: datadec = None if datadec: self.logger.warning( "Data declaration with title '{}' already found. It will be updated." .format(title)) else: datadec = DataDeclaration.objects.create(title=title, dataset=dataset) datadec.has_special_subjects = datadec_dict.get( 'has_special_subjects', False) datadec.data_types_notes = datadec_dict.get('data_type_notes', None) datadec.deidentification_method = self.process_deidentification_method( datadec_dict) datadec.subjects_category = self.process_subjects_category( datadec_dict) datadec.special_subjects_description = datadec_dict.get( 'special_subjects_description', None) datadec.other_external_id = datadec_dict.get('other_external_id', None) datadec.share_category = self.process_access_category(datadec_dict) datadec.consent_status = self.process_constent_status(datadec_dict) datadec.comments = datadec_dict.get('source_notes', None) if 'data_types' in datadec_dict: datadec.data_types_received.set( self.process_datatypes(datadec_dict)) # if 'contract_obj' not in kwargs: # if 'source_collaboration' in datadec_dict: # datadec.contract = self.process_source_contract(dataset, datadec_dict) # else: # datadec.contract = kwargs.pop('contract_obj') # if datadec.contract: # datadec.partner = datadec.contract.partners.first() self.process_use_restrictions(datadec, datadec_dict) datadec.dataset = dataset datadec.save()
def process_study(self, study_dict): try: title = study_dict['title'] except KeyError: raise DatasetImportError(data='study without title') description = study_dict.get('description', None) ethics_approval_exists = study_dict.get('ethics_approval_exists', False) ethics_notes = "The submitter confirms that an ethics approval exists for the data collection, sharing and \ the purposes for which the data is shared." if ethics_approval_exists else None existing_project = Project.objects.filter(title=title).first() if existing_project is not None: timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") logger.warning( "Project with title '{}' already found. It will be imported again with timestamp {}." .format(title, timestamp)) title = title + timestamp project = Project.objects.create(title=title, description=description, has_cner=ethics_approval_exists, cner_notes=ethics_notes) contacts = self.process_external_contacts( study_dict.get('contacts', [])) if contacts: project.contacts.set(contacts) project.save() # study_types = self.process_studytypes(study_dict) # if study_types: # project.study_types.set(study_types) # project.save() return project
def process_json(self, dataset_dict): try: title = dataset_dict['name'] except KeyError: raise DatasetImportError(data='dataset without title') title = title.strip() try: dataset = Dataset.objects.get(title=title) except Dataset.DoesNotExist: dataset = None if dataset: title_to_show = title.encode('utf8') self.logger.warning(f"Dataset with title '{title_to_show}' already found. It will be updated.") else: dataset = Dataset.objects.create(title=title) if 'project' in dataset_dict and dataset_dict['project']: dataset.project = self.process_project(dataset_dict['project']) dataset.sensitivity = dataset_dict.get('sensitivity', None) local_custodians, local_personnel, external_contacts = self.process_contacts(dataset_dict.get("contacts", [])) if local_custodians: dataset.local_custodians.set(local_custodians, clear=True) data_locations = self.process_data_locations(dataset, dataset_dict) if data_locations: dataset.data_locations.set(data_locations, bulk=False) # users_with_access = self.process_user_acl(storage_location_dict) # if users_with_access: # dl.users_with_access.set(users_with_access, bulk=False) # if 'storage_acl_notes' in storage_location_dict: # dl.access_notes = storage_location_dict['storage_acl_notes'] shares = self.process_transfers(dataset_dict, dataset) if shares: dataset.shares.set(shares, bulk=False) dataset.save() dataset.updated = True for local_custodian in local_custodians: local_custodian.assign_permissions_to_dataset(dataset) studies_map = self.process_datadeclarations(dataset_dict, dataset) # Must be run after processing data declarations self.process_studies(dataset_dict, studies_map) # Must be run after processing data declarations legal_bases = self.process_legal_bases(dataset_dict, dataset) if legal_bases: dataset.legal_basis_definitions.set(legal_bases, bulk=False) dataset.save() return True