def test_sheet_germline_inconsistent_pedigree( tsv_sheet_germline_inconsistent_pedigree, tsv_sheet_germline_trio_plus, ): """Tests Germline sheet for sheet with conflict information for joint field and row.""" # Sanity check shortcuts.GermlineCaseSheet( sheet=io_tsv.read_germline_tsv_sheet(tsv_sheet_germline_trio_plus), join_by_field='familyId') # Expect error as each member of the pedigree has its own `familyId` instead of a common one with pytest.raises(InconsistentPedigreeException): shortcuts.GermlineCaseSheet(sheet=io_tsv.read_germline_tsv_sheet( tsv_sheet_germline_inconsistent_pedigree), join_by_field='familyId')
def yield_ngs_library_names(sheet, min_batch=None, batch_key="batchNo", pedigree_field=None): """Yield DNA NGS library names for indexes. :param sheet: Sample sheet. :type sheet: biomedsheets.models.Sheet :param min_batch: Minimum batch number to be extracted from the sheet. All samples in batches below this values will be skipped. :type min_batch: int :param batch_key: Batch number key in sheet. Default: 'batchNo'. :type batch_key: str :param pedigree_field: Field that should be used to define a pedigree. If none is provided pedigree will be defined based on information in the sample sheets rows alone. """ kwargs = {} if pedigree_field: kwargs = {"join_by_field": pedigree_field} shortcut_sheet = shortcuts.GermlineCaseSheet(sheet, **kwargs) for pedigree in shortcut_sheet.cohort.pedigrees: max_batch = max( donor.extra_infos.get(batch_key, 0) for donor in pedigree.donors) if (min_batch is None or min_batch <= max_batch) and pedigree.index.dna_ngs_library: yield pedigree.index.dna_ngs_library.name
def __init__(self, args): #: Command line arguments. self.args = args #: Raw sample sheet. self.sheets = load_sheets_tsv(self.args) #: Shortcut sample sheet. self.shortcut_sheets = [shortcuts.GermlineCaseSheet(sheet) for sheet in self.sheets]
def yield_ngs_library_names(self, sheet, min_batch=None, max_batch=None, batch_key="batchNo", family_key="familyId"): """Yield index only NGS library names from sheet. When ``min_batch`` is given then only the donors for which the ``extra_infos[batch_key]`` is greater than ``min_batch`` will be used. This function can be overloaded, for example to only consider the indexes. :param sheet: Sample sheet. :type sheet: biomedsheets.models.Sheet :param min_batch: Minimum batch number to be extracted from the sheet. All samples in batches below this values will be skipped. :type min_batch: int :param max_batch: Maximum batch number to be extracted from the sheet. All samples in batches above this values will be skipped. :type max_batch: int :param batch_key: Batch number key in sheet. Default: 'batchNo'. :type batch_key: str :param family_key: Family identifier key. Default: 'familyId'. :type family_key: str """ family_max_batch = self._build_family_max_batch( sheet, batch_key, family_key) shortcut_sheet = shortcuts.GermlineCaseSheet(sheet) for pedigree in shortcut_sheet.cohort.pedigrees: donor = pedigree.index if min_batch is not None: batch = self._batch_of(donor, family_max_batch, batch_key, family_key) if batch < min_batch: logger.debug( "Skipping donor %s because %s = %d < min_batch = %d", donor.name, batch_key, donor.extra_infos[batch_key], min_batch, ) continue if max_batch is not None: if batch > max_batch: logger.debug( "Skipping donor %s because %s = %d > max_batch = %d", donor.name, batch_key, donor.extra_infos[batch_key], max_batch, ) continue logger.debug("Processing NGS library for donor %s", donor.name) yield donor.dna_ngs_library.name
def yield_ngs_library_names(sheet, min_batch=None, batch_key="batchNo"): shortcut_sheet = shortcuts.GermlineCaseSheet(sheet) for pedigree in shortcut_sheet.cohort.pedigrees: donor = pedigree.index if min_batch is not None and batch_key in donor.extra_infos: if min_batch > donor.extra_infos[batch_key]: logger.debug( "Skipping donor %s because %s = %d < min_batch = %d", donor.name, donor.extra_infos[batch_key], batch_key, min_batch, ) continue yield donor.dna_ngs_library.name
def test_load_sheet_tsv(): """Tests load_sheet_tsv()""" # Define expected expected_ngs_library_name_list = [ "P001-N1-DNA1-WGS1", "P004-N1-DNA1-WGS1", "P007-N1-DNA1-WGS1" ] # Define input sheet_path = pathlib.Path( __file__).resolve().parent / "data" / "germline_sheet.tsv" # Get actual sheet = load_sheet_tsv(path_tsv=sheet_path) assert isinstance(sheet, models.Sheet) # Convert to manageable format shortcut_sheet = shortcuts.GermlineCaseSheet(sheet) for pedigree in shortcut_sheet.cohort.pedigrees: assert pedigree.index.dna_ngs_library.name in expected_ngs_library_name_list
def yield_ngs_library_names(self, sheet, min_batch=None, batch_key="batchNo", family_key="familyId"): family_max_batch = self._build_family_max_batch( sheet, batch_key, family_key) shortcut_sheet = shortcuts.GermlineCaseSheet(sheet) for pedigree in shortcut_sheet.cohort.pedigrees: donor = pedigree.index if min_batch is not None: batch = self._batch_of(donor, family_max_batch, batch_key, family_key) if batch < min_batch: logger.debug( "Skipping donor %s because %s = %d < min_batch = %d", donor.name, batch_key, donor.extra_infos[batch_key], min_batch, ) continue yield donor.dna_ngs_library.name
def sheet_germline(tsv_sheet_germline): """Return ``Sheet`` instance for the germline example""" return shortcuts.GermlineCaseSheet( io_tsv.read_germline_tsv_sheet(tsv_sheet_germline))
def test_sheet_germline_trio_plus_exception(tsv_sheet_germline_trio_plus): """Tests UndefinedFieldException raise while creating GermlineCaseSheet""" with pytest.raises(UndefinedFieldException): shortcuts.GermlineCaseSheet( sheet=io_tsv.read_germline_tsv_sheet(tsv_sheet_germline_trio_plus), join_by_field='undefined_field')
def sheet_germline_only_parent_samples(tsv_sheet_germline_only_parent_samples): """Return ``Sheet`` instance for the germline example where only parents have samples""" return shortcuts.GermlineCaseSheet( io_tsv.read_germline_tsv_sheet( tsv_sheet_germline_only_parent_samples, naming_scheme=naming.NAMING_ONLY_SECONDARY_ID))
def sheet_germline_two_test_samples(tsv_sheet_germline_two_test_samples): """Return ``Sheet`` instance for the germline example with two test_samples""" return shortcuts.GermlineCaseSheet( io_tsv.read_germline_tsv_sheet( tsv_sheet_germline_two_test_samples, naming_scheme=naming.NAMING_ONLY_SECONDARY_ID))
def sheet_germline_multiple_trio_plus(tsv_sheet_germline_multiple_trio_plus): """Return ``Sheet`` instance for the germline multiple trio plus example""" return shortcuts.GermlineCaseSheet(sheet=io_tsv.read_germline_tsv_sheet( tsv_sheet_germline_multiple_trio_plus), join_by_field='familyId')