def files_data(files_raw): return { 'config': files_api.parse_config(files_raw['config']), 'sampleinfo': files_api.parse_sampleinfo(files_raw['sampleinfo']), 'qcmetrics': files_api.parse_qcmetrics(files_raw['qcmetrics']), 'sacct': sacct.parse_sacct(files_raw['sacct']), }
def test_parse_qcmetrics(files_raw): """ Args: files_raw (dict): With dicts from files """ # GIVEN qc metrics input from an analysis qcmetrics_raw = files_raw['qcmetrics'] # WHEN parsing it qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) # THEN it should work ;-) assert isinstance(qcmetrics_data, dict) # Sample data # Build dict for sample return data qcmetrics_test_sample_data = { 'at_dropout': '1.716704', 'duplicates': 0.0379523291229131, 'id': 'mother', 'insert_size_standard_deviation': 94.353778, 'gc_dropout': '0.214813', 'mapped': 0.9974176575073073, 'median_insert_size': '409', 'plink_sex': 'female', 'predicted_sex': 'female', 'reads': 600006004, 'strand_balance': 0.50162, 'target_coverage': 28.643247, } # Check returns from def for key, value in qcmetrics_test_sample_data.items(): for sample_data in qcmetrics_data['samples']: if sample_data['id'] != qcmetrics_test_sample_data['id']: continue assert sample_data[key] == value # Sample coverage data # Build dict for sample coverage return data qcmetrics_test_sample_cov_data = { 10: '0.98974', 20: '0.935455', 50: '0.002685', 100: '0.000101', } # Check returns from def for key, value in qcmetrics_test_sample_cov_data.items(): for sample_data in qcmetrics_data['samples']: if sample_data['id'] != qcmetrics_test_sample_data['id']: continue assert sample_data['completeness_target'][key] == value
def files_data(files_raw): """Get some data files""" return { "config": mip_dna_files_api.parse_config(files_raw["config"]), "sampleinfo": mip_dna_files_api.parse_sampleinfo(files_raw["sampleinfo"]), "qcmetrics": mip_dna_files_api.parse_qcmetrics(files_raw["qcmetrics"]), "rna_config": mip_dna_files_api.parse_config(files_raw["rna_config"]), "rna_sampleinfo": mip_rna_files_api.parse_sampleinfo_rna(files_raw["rna_sampleinfo"]), "rna_config_store": store_mip.parse_config(files_raw["rna_config_store"]), "rna_sampleinfo_store": store_mip.parse_sampleinfo(files_raw["rna_sampleinfo_store"]), }
def parse_qcmetrics(data: dict) -> dict: """Call internal Trailblazer MIP API.""" return files.parse_qcmetrics(data)
def test_parse_qcmetrics(files_raw): """ Args: files_raw (dict): With dicts from files """ # GIVEN qc metrics input from an analysis qcmetrics_raw = files_raw['qcmetrics'] # WHEN parsing it qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) # THEN it should work ;-) assert isinstance(qcmetrics_data, dict) ## Version data # Build dict for version return data qcmetrics_test_version_data = { 'freebayes': 'v1.0.2', 'gatk': 3.6, 'manta': '1.0.3', 'bcftools': '1.3.1+htslib-1.3.1', 'vep': 'v87', } # Check returns from def for key, value in qcmetrics_test_version_data.items(): assert qcmetrics_data['versions'][key] == value ## Sample data # Build dict for sample return data qcmetrics_test_sample_data = { 'at_dropout': 0.126963, 'duplicates': 0.132719986683768, 'id': 'sample', 'insert_size_standard_deviation': 89.871783, 'gc_dropout': 3.956909, 'mapped': 0.9850732625744484, 'median_insert_size': 413, 'plink_sex': 'female', 'predicted_sex': 'female', 'reads': 949878168, 'strand_balance': 0.499558, 'target_coverage': 37.428611, } # Check returns from def for key, value in qcmetrics_test_sample_data.items(): for sample_data in qcmetrics_data['samples']: assert sample_data[key] == value ## Sample coverage data # Build dict for sample coverage return data qcmetrics_test_sample_cov_data = { 10: 0.991187, 20: 0.984713, 50: 0.063229, 100: 0.000372, } # Check returns from def for key, value in qcmetrics_test_sample_cov_data.items(): for sample_data in qcmetrics_data['samples']: assert sample_data['completeness_target'][key] == value
def check(context: click.Context, family: str): """Delete an analysis log from the database.""" analysis_obj = context.obj['store'].analyses(family=family).first() if analysis_obj is None: LOG.error('no analysis found') context.abort() config_path = Path(analysis_obj.config_path) if not config_path.exists(): LOG.error(f"analysis config not found: {config_path}") context.abort() config_raw = ruamel.yaml.safe_load(config_path.open()) config_data = files.parse_config(config_raw) sampleinfo_raw = ruamel.yaml.safe_load( Path(config_data['sampleinfo_path']).open()) sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw) qcmetrics_path = Path(sampleinfo_data['qcmetrics_path']) if not qcmetrics_path.exists(): LOG.error(f"qc metrics not found: {str(qcmetrics_path)}") context.abort() qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open()) qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) samples = { 'sample': [], 'type': [], 'ped': [], 'chanjo': [], 'peddy': [], 'plink': [], 'duplicates': [], } for sample_data in config_data['samples']: LOG.debug(f"{sample_data['id']}: parse analysis config") samples['sample'].append(sample_data['id']) samples['type'].append(sample_data['type']) for sample_data in sampleinfo_data['samples']: LOG.debug(f"{sample_data['id']}: parse sample info") samples['ped'].append(sample_data['sex']) with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle: sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle) predicted_sex = sexcheck_data['predicted_sex'] xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage'] samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})") for sample_data in qcmetrics_data['samples']: LOG.debug(f"{sample_data['id']}: parse qc metrics") samples['plink'].append(sample_data['plink_sex']) duplicates_percent = sample_data['duplicates'] * 100 samples['duplicates'].append(f"{duplicates_percent:.3f}%") peddy_path = Path(sampleinfo_data['peddy']['sex_check']) if peddy_path.exists(): with peddy_path.open() as sexcheck_handle: peddy_data = files.parse_peddy_sexcheck(sexcheck_handle) for sample_id in samples['sample']: LOG.debug(f"{sample_id}: parse peddy") predicted_sex = peddy_data[sample_id]['predicted_sex'] het_ratio = peddy_data[sample_id]['het_ratio'] samples['peddy'].append(f"{predicted_sex} ({het_ratio})") else: LOG.warning(f"missing peddy output: {peddy_path}") print(tabulate(samples, headers='keys', tablefmt='psql'))
def _parse_qc_metric_file_into_dict(qcmetrics_raw): qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) return qcmetrics_data
def check(context: click.Context, family: str): """Delete an analysis log from the database.""" analysis_obj = context.obj['store'].analyses(family=family).first() if analysis_obj is None: LOG.error('no analysis found') context.abort() config_path = Path(analysis_obj.config_path) if not config_path.exists(): LOG.error(f"analysis config not found: {config_path}") context.abort() config_raw = ruamel.yaml.safe_load(config_path.open()) config_data = files.parse_config(config_raw) sampleinfo_raw = ruamel.yaml.safe_load(Path(config_data['sampleinfo_path']).open()) sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw) qcmetrics_path = Path(sampleinfo_data['qcmetrics_path']) if not qcmetrics_path.exists(): LOG.error(f"qc metrics not found: {str(qcmetrics_path)}") context.abort() qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open()) qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) samples = { 'sample': [], 'type': [], 'ped': [], 'chanjo': [], 'peddy': [], 'plink': [], 'duplicates': [], } for sample_data in config_data['samples']: LOG.debug(f"{sample_data['id']}: parse analysis config") samples['sample'].append(sample_data['id']) samples['type'].append(sample_data['type']) for sample_data in sampleinfo_data['samples']: LOG.debug(f"{sample_data['id']}: parse sample info") samples['ped'].append(sample_data['sex']) with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle: sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle) predicted_sex = sexcheck_data['predicted_sex'] xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage'] samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})") for sample_data in qcmetrics_data['samples']: LOG.debug(f"{sample_data['id']}: parse qc metrics") samples['plink'].append(sample_data['plink_sex']) duplicates_percent = sample_data['duplicates'] * 100 samples['duplicates'].append(f"{duplicates_percent:.3f}%") peddy_path = Path(sampleinfo_data['peddy']['sex_check']) if peddy_path.exists(): with peddy_path.open() as sexcheck_handle: peddy_data = files.parse_peddy_sexcheck(sexcheck_handle) for sample_id in samples['sample']: LOG.debug(f"{sample_id}: parse peddy") predicted_sex = peddy_data[sample_id]['predicted_sex'] het_ratio = peddy_data[sample_id]['het_ratio'] samples['peddy'].append(f"{predicted_sex} ({het_ratio})") else: LOG.warning(f"missing peddy output: {peddy_path}") print(tabulate(samples, headers='keys', tablefmt='psql'))