Esempio n. 1
0
def files_data(files_raw):
    """Get some data files"""
    return {
        "config": mip_dna_files_api.parse_config(files_raw["config"]),
        "sampleinfo": mip_dna_files_api.parse_sampleinfo(files_raw["sampleinfo"]),
        "qcmetrics": mip_dna_files_api.parse_qcmetrics(files_raw["qcmetrics"]),
        "rna_config": mip_dna_files_api.parse_config(files_raw["rna_config"]),
        "rna_sampleinfo": mip_rna_files_api.parse_sampleinfo_rna(files_raw["rna_sampleinfo"]),
        "rna_config_store": store_mip.parse_config(files_raw["rna_config_store"]),
        "rna_sampleinfo_store": store_mip.parse_sampleinfo(files_raw["rna_sampleinfo_store"]),
    }
Esempio n. 2
0
def files_data(files_raw):
    return {
        'config': files_api.parse_config(files_raw['config']),
        'sampleinfo': files_api.parse_sampleinfo(files_raw['sampleinfo']),
        'qcmetrics': files_api.parse_qcmetrics(files_raw['qcmetrics']),
        'sacct': sacct.parse_sacct(files_raw['sacct']),
    }
Esempio n. 3
0
def cancel(context, jobs, analysis_id):
    """Cancel all jobs in a run."""
    analysis_obj = context.obj['store'].analysis(analysis_id)
    if analysis_obj is None:
        click.echo('analysis not found')
        context.abort()
    elif analysis_obj.status != 'running':
        click.echo(f"analysis not running: {analysis_obj.status}")
        context.abort()

    config_path = Path(analysis_obj.config_path)
    with config_path.open() as config_stream:
        config_raw = ruamel.yaml.safe_load(config_stream)
    config_data = parse_config(config_raw)

    log_path = Path(f"{config_data['log_path']}")
    if not log_path.exists():
        click.echo(f"missing MIP log file: {log_path}")
        context.abort()

    with log_path.open() as log_stream:
        all_jobs = job_ids(log_stream)

    if jobs:
        for job_id in all_jobs:
            click.echo(job_id)
    else:
        for job_id in all_jobs:
            LOG.debug(f"cancelling job: {job_id}")
            process = subprocess.Popen(['scancel', job_id])
            process.wait()

        analysis_obj.status = 'canceled'
        context.obj['store'].commit()
        click.echo('cancelled analysis successfully!')
Esempio n. 4
0
 def __call__(self,
              config_stream: List[str],
              sampleinfo: str = None,
              sacct: str = None):
     """Add a new analysis log."""
     config_raw = ruamel.yaml.safe_load(config_stream)
     config_data = files_api.parse_config(config_raw)
     sampleinfo_path = Path(sampleinfo or config_data['sampleinfo_path'])
     if not sampleinfo_path.exists():
         raise MissingFileError(sampleinfo_path)
     with sampleinfo_path.open() as stream:
         sampleinfo_raw = ruamel.yaml.safe_load(stream)
     sampleinfo_data = files_api.parse_sampleinfo_light(sampleinfo_raw)
     sacct_path = Path(
         sacct if sacct else f"{config_data['log_path']}.status")
     if not sacct_path.exists():
         raise MissingFileError(sacct_path)
     with sacct_path.open() as stream:
         sacct_jobs = sacct_api.parse_sacct(stream)
     with Path(config_data['log_path']).open() as stream:
         jobs = len(miplog.job_ids(stream))
     run_data = self.parse(config_data,
                           sampleinfo_data,
                           sacct_jobs,
                           jobs=jobs)
     self._delete_temp_logs(run_data['case'])
     new_run = self.build(run_data)
     if new_run:
         self.store.add_commit(new_run)
         return new_run
Esempio n. 5
0
def files_data(files_raw):
    return {
        'config': files_api.parse_config(files_raw['config']),
        'sampleinfo': files_api.parse_sampleinfo(files_raw['sampleinfo']),
        'qcmetrics': files_api.parse_qcmetrics(files_raw['qcmetrics']),
        'sacct': sacct.parse_sacct(files_raw['sacct']),
    }
Esempio n. 6
0
def test_parse_config(files_raw) -> dict:
    """
    Args:
    files_raw (dict): With dicts from files
    """

    # GIVEN config data of a "sharp" run (not dry run)
    config_raw = files_raw['config']

    # WHEN parsing the MIP output config
    config_data = files.parse_config(config_raw)

    # THEN it should work
    assert isinstance(config_data, dict)

    # THEN it should work and the dry run check should be "yes"
    assert config_data['is_dryrun'] is True

    # ... and should include all samples
    assert len(config_raw['analysis_type']) == len(config_data['samples'])

    # Build dict for return data
    config_test_data = {
        'config_path': '/path_to/cases/case/analysis/case_config.yaml',
        'email': '*****@*****.**',
        'case': 'case',
        'log_path': 'tests/fixtures/case/mip_2019-07-04T10:47:15.log',
        'sampleinfo_path': '/path_to/cases/case/analysis/case_qc_sample_info.yaml',
        'out_dir': '/path_to/cases/case/analysis',
        'priority': 'normal',
        }

    # Check returns from def
    for key, value in config_test_data.items():
        assert config_data[key] == value
Esempio n. 7
0
File: add.py Progetto: mayabrandi/cg
 def add_analysis(cls, config_stream):
     """Gather information from MIP analysis to store."""
     config_raw = ruamel.yaml.safe_load(config_stream)
     config_data = mip_files.parse_config(config_raw)
     sampleinfo_raw = ruamel.yaml.safe_load(
         Path(config_data['sampleinfo_path']).open())
     sampleinfo_data = mip_files.parse_sampleinfo(sampleinfo_raw)
     if sampleinfo_data['is_finished'] is False:
         raise AnalysisNotFinishedError('analysis not finished')
     new_bundle = cls._build_bundle(config_data, sampleinfo_data)
     return new_bundle
Esempio n. 8
0
def test_parse_config(files_raw) -> dict:
    """
    Args:
    files_raw (dict): With dicts from files
    """

    # GIVEN config data of a "sharp" run (not dry run)
    config_raw = files_raw['config']

    # WHEN parsing the MIP output config
    config_data = files.parse_config(config_raw)

    # THEN it should work
    assert isinstance(config_data, dict)

    # THEN it should work and the dry run check should be "no"
    assert config_data['is_dryrun'] is False

    # ... and should include all samples
    assert len(config_raw['analysis_type']) == len(config_data['samples'])

    # Build dict for return data
    config_test_data = {
        'config_path':
        'tests/fixtures/family/family_config.yaml',
        'email':
        '*****@*****.**',
        'family':
        'family',
        'log_path':
        'tests/fixtures/family/mip.pl_2017-06-17T12:11:42.log',
        'sampleinfo_path':
        ('/mnt/hds/proj/bioinfo/MIP_ANALYSIS/' +
         'customers/cust003/family/analysis/' + 'family_qc_sample_info.yaml'),
        'out_dir': ('/mnt/hds/proj/bioinfo/MIP_ANALYSIS/customers/' +
                    'cust003/family/analysis'),
        'priority':
        'normal',
    }

    # Check returns from def
    for key, value in config_test_data.items():
        assert config_data[key] == value
def test_parse_config(files_raw) -> dict:
    """
    Args:
    files_raw (dict): With dicts from files
    """

    # GIVEN config data of a "sharp" run (not dry run)
    config_raw = files_raw['config']

    # WHEN parsing the MIP output config
    config_data = files.parse_config(config_raw)

    # THEN it should work
    assert isinstance(config_data, dict)

    # THEN it should work and the dry run check should be "no"
    assert config_data['is_dryrun'] is False

    # ... and should include all samples
    assert len(config_raw['analysis_type']) == len(config_data['samples'])

    # Build dict for return data
    config_test_data = {
        'config_path': 'tests/fixtures/family/family_config.yaml',
        'email': '*****@*****.**',
        'family': 'family',
        'log_path': 'tests/fixtures/family/mip.pl_2017-06-17T12:11:42.log',
        'sampleinfo_path': ('/mnt/hds/proj/bioinfo/MIP_ANALYSIS/' +
                            'customers/cust003/family/analysis/' +
                            'family_qc_sample_info.yaml'),
        'out_dir': ('/mnt/hds/proj/bioinfo/MIP_ANALYSIS/customers/' +
                    'cust003/family/analysis'),
        'priority': 'normal',
        }

    # Check returns from def
    for key, value in config_test_data.items():
        assert config_data[key] == value
Esempio n. 10
0
 def __call__(self, config_stream: List[str], sampleinfo: str=None, sacct: str=None):
     """Add a new analysis log."""
     config_raw = ruamel.yaml.safe_load(config_stream)
     config_data = files_api.parse_config(config_raw)
     sampleinfo_path = Path(sampleinfo or config_data['sampleinfo_path'])
     if not sampleinfo_path.exists():
         raise MissingFileError(sampleinfo_path)
     with sampleinfo_path.open() as stream:
         sampleinfo_raw = ruamel.yaml.safe_load(stream)
     sampleinfo_data = files_api.parse_sampleinfo(sampleinfo_raw)
     sacct_path = Path(sacct if sacct else f"{config_data['log_path']}.status")
     if not sacct_path.exists():
         raise MissingFileError(sacct_path)
     with sacct_path.open() as stream:
         sacct_jobs = sacct_api.parse_sacct(stream)
     with Path(config_data['log_path']).open() as stream:
         jobs = len(miplog.job_ids(stream))
     run_data = self.parse(config_data, sampleinfo_data, sacct_jobs, jobs=jobs)
     self._delete_temp_logs(run_data['family'])
     new_run = self.build(run_data)
     if new_run:
         self.store.add_commit(new_run)
         return new_run
Esempio n. 11
0
 def get_sampleinfo(analysis: models.Analysis) -> str:
     """Get the sample info path for an analysis."""
     raw_data = ruamel.yaml.safe_load(Path(analysis.config_path).open())
     data = files.parse_config(raw_data)
     return data["sampleinfo_path"]
Esempio n. 12
0
def check(context: click.Context, family: str):
    """Delete an analysis log from the database."""
    analysis_obj = context.obj['store'].analyses(family=family).first()
    if analysis_obj is None:
        LOG.error('no analysis found')
        context.abort()

    config_path = Path(analysis_obj.config_path)
    if not config_path.exists():
        LOG.error(f"analysis config not found: {config_path}")
        context.abort()
    config_raw = ruamel.yaml.safe_load(config_path.open())
    config_data = files.parse_config(config_raw)

    sampleinfo_raw = ruamel.yaml.safe_load(
        Path(config_data['sampleinfo_path']).open())
    sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw)

    qcmetrics_path = Path(sampleinfo_data['qcmetrics_path'])
    if not qcmetrics_path.exists():
        LOG.error(f"qc metrics not found: {str(qcmetrics_path)}")
        context.abort()
    qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open())
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    samples = {
        'sample': [],
        'type': [],
        'ped': [],
        'chanjo': [],
        'peddy': [],
        'plink': [],
        'duplicates': [],
    }
    for sample_data in config_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse analysis config")
        samples['sample'].append(sample_data['id'])
        samples['type'].append(sample_data['type'])

    for sample_data in sampleinfo_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse sample info")
        samples['ped'].append(sample_data['sex'])

        with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle:
            sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle)

        predicted_sex = sexcheck_data['predicted_sex']
        xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage']
        samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})")

    for sample_data in qcmetrics_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse qc metrics")
        samples['plink'].append(sample_data['plink_sex'])
        duplicates_percent = sample_data['duplicates'] * 100
        samples['duplicates'].append(f"{duplicates_percent:.3f}%")

    peddy_path = Path(sampleinfo_data['peddy']['sex_check'])
    if peddy_path.exists():
        with peddy_path.open() as sexcheck_handle:
            peddy_data = files.parse_peddy_sexcheck(sexcheck_handle)

        for sample_id in samples['sample']:
            LOG.debug(f"{sample_id}: parse peddy")
            predicted_sex = peddy_data[sample_id]['predicted_sex']
            het_ratio = peddy_data[sample_id]['het_ratio']
            samples['peddy'].append(f"{predicted_sex} ({het_ratio})")
    else:
        LOG.warning(f"missing peddy output: {peddy_path}")

    print(tabulate(samples, headers='keys', tablefmt='psql'))
Esempio n. 13
0
def _parse_raw_mip_config_into_dict(mip_config_raw):
    config_data = files.parse_config(mip_config_raw)
    return config_data
Esempio n. 14
0
def check(context: click.Context, family: str):
    """Delete an analysis log from the database."""
    analysis_obj = context.obj['store'].analyses(family=family).first()
    if analysis_obj is None:
        LOG.error('no analysis found')
        context.abort()

    config_path = Path(analysis_obj.config_path)
    if not config_path.exists():
        LOG.error(f"analysis config not found: {config_path}")
        context.abort()
    config_raw = ruamel.yaml.safe_load(config_path.open())
    config_data = files.parse_config(config_raw)

    sampleinfo_raw = ruamel.yaml.safe_load(Path(config_data['sampleinfo_path']).open())
    sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw)

    qcmetrics_path = Path(sampleinfo_data['qcmetrics_path'])
    if not qcmetrics_path.exists():
        LOG.error(f"qc metrics not found: {str(qcmetrics_path)}")
        context.abort()
    qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open())
    qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw)

    samples = {
        'sample': [],
        'type': [],
        'ped': [],
        'chanjo': [],
        'peddy': [],
        'plink': [],
        'duplicates': [],
    }
    for sample_data in config_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse analysis config")
        samples['sample'].append(sample_data['id'])
        samples['type'].append(sample_data['type'])

    for sample_data in sampleinfo_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse sample info")
        samples['ped'].append(sample_data['sex'])

        with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle:
            sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle)

        predicted_sex = sexcheck_data['predicted_sex']
        xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage']
        samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})")

    for sample_data in qcmetrics_data['samples']:
        LOG.debug(f"{sample_data['id']}: parse qc metrics")
        samples['plink'].append(sample_data['plink_sex'])
        duplicates_percent = sample_data['duplicates'] * 100
        samples['duplicates'].append(f"{duplicates_percent:.3f}%")

    peddy_path = Path(sampleinfo_data['peddy']['sex_check'])
    if peddy_path.exists():
        with peddy_path.open() as sexcheck_handle:
            peddy_data = files.parse_peddy_sexcheck(sexcheck_handle)

        for sample_id in samples['sample']:
            LOG.debug(f"{sample_id}: parse peddy")
            predicted_sex = peddy_data[sample_id]['predicted_sex']
            het_ratio = peddy_data[sample_id]['het_ratio']
            samples['peddy'].append(f"{predicted_sex} ({het_ratio})")
    else:
        LOG.warning(f"missing peddy output: {peddy_path}")

    print(tabulate(samples, headers='keys', tablefmt='psql'))
Esempio n. 15
0
def _parse_raw_mip_config_into_dict(mip_config_raw):
    config_data = files.parse_config(mip_config_raw)
    return config_data