async def get_external_participant_id_to_internal_sample_id_export(
    project: str,
    export_type: FileExtension,
    flip_columns: bool = False,
    connection: Connection = get_project_readonly_connection,
):
    """
    Get csv / tsv export of external_participant_id to internal_sample_id

    :param flip_columns: Set to True when exporting for seqr
    """
    player = ParticipantLayer(connection)
    # this wants project ID (connection.project)
    assert connection.project
    m = await player.get_external_participant_id_to_internal_sample_id_map(
        project=connection.project)

    rows = [[pid, sample_id_format(sid)] for pid, sid in m]
    if flip_columns:
        rows = [r[::-1] for r in rows]

    output = io.StringIO()
    writer = csv.writer(output, delimiter=export_type.get_delimiter())
    writer.writerows(rows)

    ext = export_type.get_extension()
    filename = f'{project}-participant-to-sample-map-{date.today().isoformat()}{ext}'
    return StreamingResponse(
        iter(output.getvalue()),
        media_type=export_type.get_mime_type(),
        headers={'Content-Disposition': f'filename={filename}'},
    )
Example #2
0
async def import_individual_metadata_manifest(
    file: UploadFile = File(...),
    delimiter: Optional[str] = None,
    extra_participants_method: ExtraParticipantImporterHandler = ExtraParticipantImporterHandler.FAIL,
    connection: Connection = get_project_write_connection,
):
    """
    Import individual metadata manifest

    :param extra_participants_method: If extra participants are in the uploaded file,
        add a PARTICIPANT entry for them
    """

    delimiter = guess_delimiter_by_filename(file.filename, default_delimiter=delimiter)

    player = ParticipantLayer(connection)
    csvreader = csv.reader(
        codecs.iterdecode(file.file, 'utf-8-sig'), delimiter=delimiter
    )
    headers = next(csvreader)

    await player.generic_individual_metadata_importer(
        headers, list(csvreader), extra_participants_method=extra_participants_method
    )
    return {'success': True}
async def get_individual_metadata_template_for_seqr(
    project: str,
    export_type: FileExtension,
    external_participant_ids: Optional[List[str]] = Query(
        default=None),  # type: ignore[assignment]
    # pylint: disable=invalid-name
    replace_with_participant_external_ids: bool = True,
    connection: Connection = get_project_readonly_connection,
):
    """Get individual metadata template for SEQR as a CSV"""
    participant_layer = ParticipantLayer(connection)
    assert connection.project
    rows = await participant_layer.get_seqr_individual_template(
        project=connection.project,
        external_participant_ids=external_participant_ids,
        replace_with_participant_external_ids=
        replace_with_participant_external_ids,
    )

    output = io.StringIO()
    writer = csv.writer(output, delimiter=export_type.get_delimiter())
    writer.writerows(rows)

    basefn = f'{project}-{date.today().isoformat()}'
    ext = export_type.get_extension()
    return StreamingResponse(
        iter(output.getvalue()),
        media_type=export_type.get_mime_type(),
        headers={'Content-Disposition': f'filename={basefn}{ext}'},
    )
async def update_many_participant_external_ids(
    internal_to_external_id: Dict[int, str],
    connection: Connection = get_projectless_db_connection,
):
    """Update external_ids of participants by providing an update map"""
    player = ParticipantLayer(connection)
    return await player.update_many_participant_external_ids(
        internal_to_external_id)
async def fill_in_missing_participants(
    connection: Connection = get_project_write_connection, ):
    """
    Create a corresponding participant (if required)
    for each sample within a project, useful for then importing a pedigree
    """
    participant_layer = ParticipantLayer(connection)

    return {'success': await participant_layer.fill_in_missing_participants()}
Example #6
0
    async def test_get_participant_by_eid(self):
        """Test to see what's in the database"""
        pl = ParticipantLayer(self.connection)
        ps = await pl.get_participants(project=1,
                                       external_participant_ids=['EX02'])

        self.assertEqual(1, len(ps))

        self.assertEqual('EX02', ps[0].external_id)
        self.assertEqual(2, ps[0].meta['field'])
        self.assertEqual('XY', ps[0].karyotype)
Example #7
0
    async def setUp(self) -> None:
        super().setUp()

        pl = ParticipantLayer(self.connection)
        await pl.create_participant(external_id='EX01',
                                    reported_sex=2,
                                    karyotype='XX',
                                    meta={'field': 1})
        await pl.create_participant(external_id='EX02',
                                    reported_sex=1,
                                    karyotype='XY',
                                    meta={'field': 2})
Example #8
0
    async def test_get_all_participants(self):
        """Test getting all participants"""
        pl = ParticipantLayer(self.connection)
        ps = await pl.get_participants(project=1)

        self.assertEqual(2, len(ps))

        self.assertEqual('EX01', ps[0].external_id)
        self.assertEqual(1, ps[0].meta['field'])
        self.assertEqual('XX', ps[0].karyotype)

        self.assertEqual('EX02', ps[1].external_id)
async def get_id_map_by_external_ids(
    external_participant_ids: List[str],
    allow_missing: bool = False,
    connection: Connection = get_project_readonly_connection,
):
    """Get ID map of participants, by external_id"""
    player = ParticipantLayer(connection)
    return await player.get_id_map_by_external_ids(
        external_participant_ids,
        allow_missing=allow_missing,
        project=connection.project,
    )
async def get_participants(
    external_participant_ids: List[str] = None,
    internal_participant_ids: List[int] = None,
    connection: Connection = get_project_readonly_connection,
):
    """Get participants, default ALL participants in project"""
    player = ParticipantLayer(connection)
    return await player.get_participants(
        project=connection.project,
        external_participant_ids=external_participant_ids,
        internal_participant_ids=internal_participant_ids,
    )
async def get_external_participant_id_to_internal_sample_id(
    connection: Connection = get_project_readonly_connection, ):
    """
    Get a map of {external_participant_id} -> {internal_sample_id}
    useful to matching joint-called samples in the matrix table to the participant

    Return a list not dictionary, because dict could lose
    participants with multiple samples.
    """
    player = ParticipantLayer(connection)
    assert connection.project
    m = await player.get_external_participant_id_to_internal_sample_id_map(
        project=connection.project)
    return [[pid, sample_id_format(sid)] for pid, sid in m]
async def update_participant(
    participant_id: int,
    participant: ParticipantUpdateModel,
    connection: Connection = get_projectless_db_connection,
):
    """Update Participant Data"""
    participant_layer = ParticipantLayer(connection)

    return {
        'success':
        await participant_layer.update_single_participant(
            participant_id=participant_id,
            reported_sex=participant.reported_sex,
            reported_gender=participant.reported_gender,
            karyotype=participant.karyotype,
            meta=participant.meta,
        )
    }
Example #13
0
    async def test_pedigree_without_family(self):
        """
        Test getting pedigree where participants do not belong to a family
        """
        pl = ParticipantLayer(self.connection)
        fl = FamilyLayer(self.connection)

        await pl.create_participant(
            external_id='EX01',
            reported_sex=1,
        )
        await pl.create_participant(external_id='EX02', reported_sex=None)

        rows = await fl.get_pedigree(
            project=self.connection.project,
            include_participants_not_in_families=True,
            replace_with_participant_external_ids=True,
        )

        by_id = {r['individual_id']: r for r in rows}
        self.assertEqual(2, len(rows))
        self.assertEqual(1, by_id['EX01']['sex'])
        self.assertIsNone(by_id['EX02']['sex'])
async def batch_upsert_participants(
    participants: ParticipantUpsertBody,
    connection: Connection = get_project_write_connection,
) -> Dict[str, Any]:
    """
    Upserts a list of participants with samples and sequences
    Returns the list of internal sample IDs
    """
    # Convert id in samples to int
    for participant in participants.participants:
        for sample in participant.samples:
            if sample.id:
                sample.id = sample_id_transform_to_raw(sample.id)

    external_pids = [p.external_id for p in participants.participants]

    async with connection.connection.transaction():
        # Table interfaces
        pt = ParticipantLayer(connection)

        results = await pt.batch_upsert_participants(participants)
        pid_key = dict(zip(results.keys(), external_pids))

        # Map sids back from ints to strs
        outputs: Dict[str, Dict[str, Any]] = {}
        for pid, samples in results.items():
            samples_output: Dict[str, Any] = {}
            for iid, seqs in samples.items():
                data = {'sequences': seqs}
                samples_output[sample_id_format(iid)] = data
            outputs[pid_key[pid]] = {
                'id': pid,
                'external_id': pid_key[pid],
                'samples': samples_output,
            }

        return outputs
Example #15
0
    async def test_insert_participants(self):
        """
        Test inserting participants, samples and sequences, and make sure they're correctly linked.

        Tests the other side of:
            tests.test_parse_generic_metadata:TestParseGenericMetadata.test_rows_with_participants
        """
        all_participants = [
            ParticipantUpsert.construct(
                **{
                    'external_id': 'Demeter',
                    'meta': {},
                    'samples': [
                        SampleUpsert.construct(
                            **{
                                'external_id': 'sample_id001',
                                'meta': {},
                                'sequences': [
                                    SequenceUpsert.construct(
                                        **{
                                            'meta': {
                                                'reads': [
                                                    [
                                                        {
                                                            'basename': 'sample_id001.filename-R1.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id001.filename-R1.fastq.gz',
                                                            'size': 111,
                                                        },
                                                        {
                                                            'basename': 'sample_id001.filename-R2.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id001.filename-R2.fastq.gz',
                                                            'size': 111,
                                                        },
                                                    ]
                                                ],
                                                'reads_type': 'fastq',
                                            },
                                            'status': SequenceStatus('uploaded'),
                                            'type': SequenceType('genome'),
                                        }
                                    ),
                                    SequenceUpsert.construct(
                                        **{
                                            'meta': {
                                                'reads': [
                                                    [
                                                        {
                                                            'basename': 'sample_id001.exome.filename-R1.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id001.exome.filename-R1.fastq.gz',
                                                            'size': 111,
                                                        },
                                                        {
                                                            'basename': 'sample_id001.exome.filename-R2.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id001.exome.filename-R2.fastq.gz',
                                                            'size': 111,
                                                        },
                                                    ]
                                                ],
                                                'reads_type': 'fastq',
                                            },
                                            'status': SequenceStatus('uploaded'),
                                            'type': SequenceType('exome'),
                                        }
                                    ),
                                ],
                                'type': SampleType('blood'),
                            }
                        )
                    ],
                }
            ),
            ParticipantUpsert.construct(
                **{
                    'external_id': 'Apollo',
                    'meta': {},
                    'samples': [
                        SampleUpsert.construct(
                            **{
                                'external_id': 'sample_id002',
                                'meta': {},
                                'sequences': [
                                    SequenceUpsert.construct(
                                        **{
                                            'meta': {
                                                'reads': [
                                                    [
                                                        {
                                                            'basename': 'sample_id002.filename-R1.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id002.filename-R1.fastq.gz',
                                                            'size': 111,
                                                        },
                                                        {
                                                            'basename': 'sample_id002.filename-R2.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id002.filename-R2.fastq.gz',
                                                            'size': 111,
                                                        },
                                                    ]
                                                ],
                                                'reads_type': 'fastq',
                                            },
                                            'status': SequenceStatus('uploaded'),
                                            'type': SequenceType('genome'),
                                        }
                                    )
                                ],
                                'type': SampleType('blood'),
                            }
                        ),
                        SampleUpsert.construct(
                            **{
                                'external_id': 'sample_id004',
                                'meta': {},
                                'sequences': [
                                    SequenceUpsert.construct(
                                        **{
                                            'meta': {
                                                'reads': [
                                                    [
                                                        {
                                                            'basename': 'sample_id004.filename-R1.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id004.filename-R1.fastq.gz',
                                                            'size': 111,
                                                        },
                                                        {
                                                            'basename': 'sample_id004.filename-R2.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id004.filename-R2.fastq.gz',
                                                            'size': 111,
                                                        },
                                                    ]
                                                ],
                                                'reads_type': 'fastq',
                                            },
                                            'status': SequenceStatus('uploaded'),
                                            'type': SequenceType('genome'),
                                        }
                                    )
                                ],
                                'type': SampleType('blood'),
                            }
                        ),
                    ],
                }
            ),
            ParticipantUpsert.construct(
                **{
                    'external_id': 'Athena',
                    'meta': {},
                    'samples': [
                        SampleUpsert.construct(
                            **{
                                'external_id': 'sample_id003',
                                'meta': {},
                                'sequences': [
                                    SequenceUpsert.construct(
                                        **{
                                            'meta': {
                                                'reads': [
                                                    [
                                                        {
                                                            'basename': 'sample_id003.filename-R1.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id003.filename-R1.fastq.gz',
                                                            'size': 111,
                                                        },
                                                        {
                                                            'basename': 'sample_id003.filename-R2.fastq.gz',
                                                            'checksum': None,
                                                            'class': 'File',
                                                            'location': '/path/to/sample_id003.filename-R2.fastq.gz',
                                                            'size': 111,
                                                        },
                                                    ]
                                                ],
                                                'reads_type': 'fastq',
                                            },
                                            'status': SequenceStatus('uploaded'),
                                            'type': SequenceType('genome'),
                                        }
                                    )
                                ],
                                'type': SampleType('blood'),
                            }
                        )
                    ],
                }
            ),
        ]

        body = ParticipantUpsertBody.construct(participants=all_participants)
        # Table interfaces
        pt = ParticipantLayer(self.connection)

        await pt.batch_upsert_participants(body)

        expected_sample_eid_to_participant_eid = {
            sample.external_id: participant.external_id
            for participant in all_participants
            for sample in participant.samples
        }

        db_participants = await self.connection.connection.fetch_all(
            'SELECT * FROM participant'
        )
        self.assertEqual(3, len(db_participants))
        self.assertEqual('Demeter', db_participants[0]['external_id'])
        self.assertEqual('Apollo', db_participants[1]['external_id'])
        self.assertEqual('Athena', db_participants[2]['external_id'])

        participant_id_map = {p['external_id']: p['id'] for p in db_participants}

        db_samples = await self.connection.connection.fetch_all('SELECT * FROM sample')
        self.assertEqual(4, len(db_samples))
        for db_sample in db_samples:
            self.assertIsNotNone(db_sample['external_id'])
            self.assertIsNotNone(db_sample['participant_id'])
            # get expected_participant_id from the db_sample external_id
            expected_participant_eid = expected_sample_eid_to_participant_eid.get(
                db_sample['external_id']
            )
            self.assertEqual(
                participant_id_map[expected_participant_eid],
                db_sample['participant_id'],
            )
    async def import_pedigree(
        self,
        header: Optional[List[str]],
        rows: List[List[str]],
        create_missing_participants=False,
        perform_sex_check=True,
    ):
        """
        Import pedigree file
        """
        if header is None:
            _header = PedRow.default_header()
        else:
            _header = PedRow.parse_header_order(header)

        if len(rows) == 0:
            return None

        max_row_length = len(rows[0])
        if max_row_length > len(_header):
            raise ValueError(
                f"The parsed header {_header} isn't long enough "
                f'to cover row length ({len(_header)} < {len(rows[0])})')
        if len(_header) > max_row_length:
            _header = _header[:max_row_length]

        pedrows: List[PedRow] = [
            PedRow(**{_header[i]: r[i]
                      for i in range(len(_header))}) for r in rows
        ]
        # this validates a lot of the pedigree too
        pedrows = PedRow.order(pedrows)
        if perform_sex_check:
            PedRow.validate_sexes(pedrows, throws=True)

        external_family_ids = set(r.family_id for r in pedrows)
        # get set of all individual, paternal, maternal participant ids
        external_participant_ids = set(
            pid for r in pedrows
            for pid in [r.individual_id, r.paternal_id, r.maternal_id] if pid)

        participant_table = ParticipantLayer(self.connection)

        external_family_id_map = await self.ftable.get_id_map_by_external_ids(
            list(external_family_ids),
            project=self.connection.project,
            allow_missing=True,
        )
        missing_external_family_ids = [
            f for f in external_family_ids if f not in external_family_id_map
        ]
        external_participant_ids_map = await participant_table.get_id_map_by_external_ids(
            list(external_participant_ids),
            project=self.connection.project,
            # Allow missing participants if we're creating them
            allow_missing=create_missing_participants,
        )

        async with self.connection.connection.transaction():

            if create_missing_participants:
                missing_participant_ids = set(external_participant_ids) - set(
                    external_participant_ids_map)
                for row in pedrows:
                    if row.individual_id not in missing_participant_ids:
                        continue
                    external_participant_ids_map[
                        row.
                        individual_id] = await participant_table.create_participant(
                            external_id=row.individual_id,
                            reported_sex=row.sex)

            for external_family_id in missing_external_family_ids:
                internal_family_id = await self.ftable.create_family(
                    external_id=external_family_id,
                    description=None,
                    coded_phenotype=None,
                )
                external_family_id_map[external_family_id] = internal_family_id

            # now let's map participants back

            insertable_rows = [{
                'family_id':
                external_family_id_map[row.family_id],
                'participant_id':
                external_participant_ids_map[row.individual_id],
                'paternal_participant_id':
                external_participant_ids_map.get(row.paternal_id),
                'maternal_participant_id':
                external_participant_ids_map.get(row.maternal_id),
                'affected':
                row.affected,
                'notes':
                row.notes,
            } for row in pedrows]

            await participant_table.update_participants(
                participant_ids=[
                    external_participant_ids_map[row.individual_id]
                    for row in pedrows
                ],
                reported_sexes=[row.sex for row in pedrows],
            )
            await self.fptable.create_rows(insertable_rows)

        return True