Esempio n. 1
0
def files_data(files_raw):
    return {
        'config': files_api.parse_config(files_raw['config']),
        'sampleinfo': files_api.parse_sampleinfo(files_raw['sampleinfo']),
        'qcmetrics': files_api.parse_qcmetrics(files_raw['qcmetrics']),
        'sacct': sacct.parse_sacct(files_raw['sacct']),
    }
Esempio n. 2
0
def files_data(files_raw):
    return {
        'config': files_api.parse_config(files_raw['config']),
        'sampleinfo': files_api.parse_sampleinfo(files_raw['sampleinfo']),
        'qcmetrics': files_api.parse_qcmetrics(files_raw['qcmetrics']),
        'sacct': sacct.parse_sacct(files_raw['sacct']),
    }
Esempio n. 3
0
def test_parse_qcmetrics(files_raw):
    """
    Args:
    files_raw (dict): With dicts from files
    """

    # GIVEN qc metrics input from an analysis
    qcmetrics_raw = files_raw['qcmetrics']

    # WHEN parsing it
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    # THEN it should work ;-)
    assert isinstance(qcmetrics_data, dict)

    # Sample data
    # Build dict for sample return data
    qcmetrics_test_sample_data = {
        'at_dropout': '1.716704',
        'duplicates': 0.0379523291229131,
        'id': 'mother',
        'insert_size_standard_deviation': 94.353778,
        'gc_dropout': '0.214813',
        'mapped': 0.9974176575073073,
        'median_insert_size': '409',
        'plink_sex': 'female',
        'predicted_sex': 'female',
        'reads': 600006004,
        'strand_balance': 0.50162,
        'target_coverage': 28.643247,
        }

    # Check returns from def
    for key, value in qcmetrics_test_sample_data.items():
        for sample_data in qcmetrics_data['samples']:

            if sample_data['id'] != qcmetrics_test_sample_data['id']:
                continue

            assert sample_data[key] == value

    # Sample coverage data
    # Build dict for sample coverage return data
    qcmetrics_test_sample_cov_data = {
        10: '0.98974',
        20: '0.935455',
        50: '0.002685',
        100: '0.000101',
        }

    # Check returns from def
    for key, value in qcmetrics_test_sample_cov_data.items():
        for sample_data in qcmetrics_data['samples']:

            if sample_data['id'] != qcmetrics_test_sample_data['id']:
                continue

            assert sample_data['completeness_target'][key] == value
Esempio n. 4
0
def files_data(files_raw):
    """Get some data files"""
    return {
        "config": mip_dna_files_api.parse_config(files_raw["config"]),
        "sampleinfo": mip_dna_files_api.parse_sampleinfo(files_raw["sampleinfo"]),
        "qcmetrics": mip_dna_files_api.parse_qcmetrics(files_raw["qcmetrics"]),
        "rna_config": mip_dna_files_api.parse_config(files_raw["rna_config"]),
        "rna_sampleinfo": mip_rna_files_api.parse_sampleinfo_rna(files_raw["rna_sampleinfo"]),
        "rna_config_store": store_mip.parse_config(files_raw["rna_config_store"]),
        "rna_sampleinfo_store": store_mip.parse_sampleinfo(files_raw["rna_sampleinfo_store"]),
    }
Esempio n. 5
0
 def parse_qcmetrics(data: dict) -> dict:
     """Call internal Trailblazer MIP API."""
     return files.parse_qcmetrics(data)
Esempio n. 6
0
def test_parse_qcmetrics(files_raw):
    """
    Args:
    files_raw (dict): With dicts from files
    """

    # GIVEN qc metrics input from an analysis
    qcmetrics_raw = files_raw['qcmetrics']

    # WHEN parsing it
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    # THEN it should work ;-)
    assert isinstance(qcmetrics_data, dict)

    ## Version data
    # Build dict for version return data
    qcmetrics_test_version_data = {
        'freebayes': 'v1.0.2',
        'gatk': 3.6,
        'manta': '1.0.3',
        'bcftools': '1.3.1+htslib-1.3.1',
        'vep': 'v87',
    }

    # Check returns from def
    for key, value in qcmetrics_test_version_data.items():
        assert qcmetrics_data['versions'][key] == value

    ## Sample data
    # Build dict for sample return data
    qcmetrics_test_sample_data = {
        'at_dropout': 0.126963,
        'duplicates': 0.132719986683768,
        'id': 'sample',
        'insert_size_standard_deviation': 89.871783,
        'gc_dropout': 3.956909,
        'mapped': 0.9850732625744484,
        'median_insert_size': 413,
        'plink_sex': 'female',
        'predicted_sex': 'female',
        'reads': 949878168,
        'strand_balance': 0.499558,
        'target_coverage': 37.428611,
    }

    # Check returns from def
    for key, value in qcmetrics_test_sample_data.items():
        for sample_data in qcmetrics_data['samples']:
            assert sample_data[key] == value

    ## Sample coverage data
    # Build dict for sample coverage return data
    qcmetrics_test_sample_cov_data = {
        10: 0.991187,
        20: 0.984713,
        50: 0.063229,
        100: 0.000372,
    }
    # Check returns from def
    for key, value in qcmetrics_test_sample_cov_data.items():
        for sample_data in qcmetrics_data['samples']:
            assert sample_data['completeness_target'][key] == value
def test_parse_qcmetrics(files_raw):
    """
    Args:
    files_raw (dict): With dicts from files
    """

    # GIVEN qc metrics input from an analysis
    qcmetrics_raw = files_raw['qcmetrics']

    # WHEN parsing it
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    # THEN it should work ;-)
    assert isinstance(qcmetrics_data, dict)

    ## Version data
    # Build dict for version return data
    qcmetrics_test_version_data = {
        'freebayes': 'v1.0.2',
        'gatk': 3.6,
        'manta': '1.0.3',
        'bcftools': '1.3.1+htslib-1.3.1',
        'vep': 'v87',
        }

    # Check returns from def
    for key, value in qcmetrics_test_version_data.items():
        assert qcmetrics_data['versions'][key] == value

    ## Sample data
    # Build dict for sample return data
    qcmetrics_test_sample_data = {
        'at_dropout': 0.126963,
        'duplicates': 0.132719986683768,
        'id': 'sample',
        'insert_size_standard_deviation': 89.871783,
        'gc_dropout': 3.956909,
        'mapped': 0.9850732625744484,
        'median_insert_size': 413,
        'plink_sex': 'female',
        'predicted_sex': 'female',
        'reads': 949878168,
        'strand_balance': 0.499558,
        'target_coverage': 37.428611,
        }

    # Check returns from def
    for key, value in qcmetrics_test_sample_data.items():
        for sample_data in qcmetrics_data['samples']:
            assert sample_data[key] == value

    ## Sample coverage data
    # Build dict for sample coverage return data
    qcmetrics_test_sample_cov_data = {
        10: 0.991187,
        20: 0.984713,
        50: 0.063229,
        100: 0.000372,
        }
    # Check returns from def
    for key, value in qcmetrics_test_sample_cov_data.items():
        for sample_data in qcmetrics_data['samples']:
            assert sample_data['completeness_target'][key] == value
Esempio n. 8
0
def check(context: click.Context, family: str):
    """Delete an analysis log from the database."""
    analysis_obj = context.obj['store'].analyses(family=family).first()
    if analysis_obj is None:
        LOG.error('no analysis found')
        context.abort()

    config_path = Path(analysis_obj.config_path)
    if not config_path.exists():
        LOG.error(f"analysis config not found: {config_path}")
        context.abort()
    config_raw = ruamel.yaml.safe_load(config_path.open())
    config_data = files.parse_config(config_raw)

    sampleinfo_raw = ruamel.yaml.safe_load(
        Path(config_data['sampleinfo_path']).open())
    sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw)

    qcmetrics_path = Path(sampleinfo_data['qcmetrics_path'])
    if not qcmetrics_path.exists():
        LOG.error(f"qc metrics not found: {str(qcmetrics_path)}")
        context.abort()
    qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open())
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    samples = {
        'sample': [],
        'type': [],
        'ped': [],
        'chanjo': [],
        'peddy': [],
        'plink': [],
        'duplicates': [],
    }
    for sample_data in config_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse analysis config")
        samples['sample'].append(sample_data['id'])
        samples['type'].append(sample_data['type'])

    for sample_data in sampleinfo_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse sample info")
        samples['ped'].append(sample_data['sex'])

        with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle:
            sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle)

        predicted_sex = sexcheck_data['predicted_sex']
        xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage']
        samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})")

    for sample_data in qcmetrics_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse qc metrics")
        samples['plink'].append(sample_data['plink_sex'])
        duplicates_percent = sample_data['duplicates'] * 100
        samples['duplicates'].append(f"{duplicates_percent:.3f}%")

    peddy_path = Path(sampleinfo_data['peddy']['sex_check'])
    if peddy_path.exists():
        with peddy_path.open() as sexcheck_handle:
            peddy_data = files.parse_peddy_sexcheck(sexcheck_handle)

        for sample_id in samples['sample']:
            LOG.debug(f"{sample_id}: parse peddy")
            predicted_sex = peddy_data[sample_id]['predicted_sex']
            het_ratio = peddy_data[sample_id]['het_ratio']
            samples['peddy'].append(f"{predicted_sex} ({het_ratio})")
    else:
        LOG.warning(f"missing peddy output: {peddy_path}")

    print(tabulate(samples, headers='keys', tablefmt='psql'))
Esempio n. 9
0
def _parse_qc_metric_file_into_dict(qcmetrics_raw):
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)
    return qcmetrics_data
Esempio n. 10
0
def check(context: click.Context, family: str):
    """Delete an analysis log from the database."""
    analysis_obj = context.obj['store'].analyses(family=family).first()
    if analysis_obj is None:
        LOG.error('no analysis found')
        context.abort()

    config_path = Path(analysis_obj.config_path)
    if not config_path.exists():
        LOG.error(f"analysis config not found: {config_path}")
        context.abort()
    config_raw = ruamel.yaml.safe_load(config_path.open())
    config_data = files.parse_config(config_raw)

    sampleinfo_raw = ruamel.yaml.safe_load(Path(config_data['sampleinfo_path']).open())
    sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw)

    qcmetrics_path = Path(sampleinfo_data['qcmetrics_path'])
    if not qcmetrics_path.exists():
        LOG.error(f"qc metrics not found: {str(qcmetrics_path)}")
        context.abort()
    qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open())
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    samples = {
        'sample': [],
        'type': [],
        'ped': [],
        'chanjo': [],
        'peddy': [],
        'plink': [],
        'duplicates': [],
    }
    for sample_data in config_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse analysis config")
        samples['sample'].append(sample_data['id'])
        samples['type'].append(sample_data['type'])

    for sample_data in sampleinfo_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse sample info")
        samples['ped'].append(sample_data['sex'])

        with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle:
            sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle)

        predicted_sex = sexcheck_data['predicted_sex']
        xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage']
        samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})")

    for sample_data in qcmetrics_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse qc metrics")
        samples['plink'].append(sample_data['plink_sex'])
        duplicates_percent = sample_data['duplicates'] * 100
        samples['duplicates'].append(f"{duplicates_percent:.3f}%")

    peddy_path = Path(sampleinfo_data['peddy']['sex_check'])
    if peddy_path.exists():
        with peddy_path.open() as sexcheck_handle:
            peddy_data = files.parse_peddy_sexcheck(sexcheck_handle)

        for sample_id in samples['sample']:
            LOG.debug(f"{sample_id}: parse peddy")
            predicted_sex = peddy_data[sample_id]['predicted_sex']
            het_ratio = peddy_data[sample_id]['het_ratio']
            samples['peddy'].append(f"{predicted_sex} ({het_ratio})")
    else:
        LOG.warning(f"missing peddy output: {peddy_path}")

    print(tabulate(samples, headers='keys', tablefmt='psql'))
Esempio n. 11
0
def _parse_qc_metric_file_into_dict(qcmetrics_raw):
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)
    return qcmetrics_data