def _get_samples(self): samples_map = {} derived_from_graph = Graph() project = self.manifest.get_project() for biomaterial in self.manifest.get_biomaterials(): archive_entity = ArchiveEntity() archive_entity.manifest_id = self.manifest.manifest_id archive_type = "sample" archive_entity.archive_entity_type = archive_type archive_entity.id = self.generate_archive_entity_id( archive_type, biomaterial.data) archive_entity.data = { 'biomaterial': biomaterial.data, 'project': project } archive_entity.metadata_uuids = [ biomaterial.data['uuid']['uuid'], project['uuid']['uuid'] ] archive_entity.accessioned_metadata_uuids = [ biomaterial.data['uuid']['uuid'] ] if biomaterial.derived_by_process: # TODO protocols will be needed for samples conversion # archive_entity.data.update(biomaterial.derived_with_protocols) sample_links = [] for derived_from in biomaterial.derived_from_biomaterials: derived_from_alias = self.generate_archive_entity_id( 'sample', derived_from) derived_from_graph.add_edge(derived_from_alias, archive_entity.id) sample_links.append({ 'alias': derived_from_alias, 'relationshipNature': 'derived from' }) links = {'sampleRelationships': sample_links} archive_entity.links = links samples_map[archive_entity.id] = archive_entity sorted_samples = derived_from_graph.topological_sort() priority_samples = [ samples_map.get(sample) for sample in sorted_samples if samples_map.get(sample) ] orphan_samples = [ samples_map.get(sample) for sample in samples_map.keys() if sample not in priority_samples ] return priority_samples + orphan_samples
def _get_sequencing_experiments(self): process = self.manifest.get_assay_process() if not process: return [] input_biomaterial = self.manifest.get_input_biomaterial() archive_entity = ArchiveEntity() archive_entity.manifest_id = self.manifest.manifest_id archive_type = "sequencingExperiment" archive_entity.archive_entity_type = archive_type archive_entity.id = self.generate_archive_entity_id( archive_type, process) lib_prep_protocol = self.manifest.get_library_preparation_protocol() seq_protocol = self.manifest.get_sequencing_protocol() archive_entity.data = { 'process': process, 'library_preparation_protocol': lib_prep_protocol, 'sequencing_protocol': seq_protocol, 'input_biomaterial': input_biomaterial } archive_entity.metadata_uuids = [ lib_prep_protocol['uuid']['uuid'], seq_protocol['uuid']['uuid'], input_biomaterial['uuid']['uuid'], process['uuid']['uuid'], ] archive_entity.accessioned_metadata_uuids = [process['uuid']['uuid']] links = {} links['studyRef'] = { "alias": self.generate_archive_entity_id('study', self.manifest.get_project()) } links['sampleUses'] = [] sample_ref = { 'sampleRef': { "alias": self.generate_archive_entity_id('sample', input_biomaterial) } } links['sampleUses'].append(sample_ref) archive_entity.links = links return [archive_entity]
def _get_projects(self): project = self.manifest.get_project() if not project: return [] archive_entity = ArchiveEntity() archive_type = "project" archive_entity.archive_entity_type = archive_type archive_entity.id = self.generate_archive_entity_id(archive_type, project) archive_entity.data = {"project": project} archive_entity.metadata_uuids = [project['uuid']['uuid']] archive_entity.accessioned_metadata_uuids = [project['uuid']['uuid']] archive_entity.manifest_id = self.manifest.manifest_id return [archive_entity]
def _get_studies(self): project = self.manifest.get_project() if not project: return [] archive_entity = ArchiveEntity() archive_entity.manifest_id = self.manifest.manifest_id archive_type = "study" archive_entity.archive_entity_type = archive_type archive_entity.id = self.generate_archive_entity_id(archive_type, project) archive_entity.data = {"project": project} archive_entity.metadata_uuids = [project['uuid']['uuid']] archive_entity.accessioned_metadata_uuids = [project['uuid']['uuid']] archive_entity.links = { "projectRef": { "alias": self.generate_archive_entity_id('project', project) } } return [archive_entity]
def _get_sequencing_runs(self): process = self.manifest.get_assay_process() lib_prep_protocol = self.manifest.get_library_preparation_protocol() files = self.manifest.get_files() lanes = {} # Index files by lane index for file in files: lane_index = file.get('content').get('lane_index', 1) if lane_index not in lanes: lanes[lane_index] = [] lanes[lane_index].append(file) archive_entities = [] for lane_index in lanes.keys(): lane_files = lanes.get(lane_index) archive_entity = ArchiveEntity() archive_entity.manifest_id = self.manifest.manifest_id archive_type = "sequencingRun" archive_entity.archive_entity_type = archive_type archive_entity.id = self.generate_archive_entity_id( archive_type, process) archive_entity.data = { 'library_preparation_protocol': lib_prep_protocol, 'process': process, 'files': lane_files, 'manifest_id': archive_entity.manifest_id } metadata_uuids = [ lib_prep_protocol['uuid']['uuid'], process['uuid']['uuid'] ] file_uuids = [f['uuid']['uuid'] for f in lane_files] metadata_uuids.extend(file_uuids) archive_entity.metadata_uuids = metadata_uuids archive_entity.accessioned_metadata_uuids = file_uuids archive_entity.links = { 'assayRefs': [{ "alias": self.generate_archive_entity_id('sequencingExperiment', process) }] } if len(lanes) > 1: archive_entity.data['lane_index'] = lane_index archive_entity.id = f'{archive_entity.id}_{lane_index}' archive_entities.append(archive_entity) return archive_entities