def files_data(files_raw): """Get some data files""" return { "config": mip_dna_files_api.parse_config(files_raw["config"]), "sampleinfo": mip_dna_files_api.parse_sampleinfo(files_raw["sampleinfo"]), "qcmetrics": mip_dna_files_api.parse_qcmetrics(files_raw["qcmetrics"]), "rna_config": mip_dna_files_api.parse_config(files_raw["rna_config"]), "rna_sampleinfo": mip_rna_files_api.parse_sampleinfo_rna(files_raw["rna_sampleinfo"]), "rna_config_store": store_mip.parse_config(files_raw["rna_config_store"]), "rna_sampleinfo_store": store_mip.parse_sampleinfo(files_raw["rna_sampleinfo_store"]), }
def files_data(files_raw): return { 'config': files_api.parse_config(files_raw['config']), 'sampleinfo': files_api.parse_sampleinfo(files_raw['sampleinfo']), 'qcmetrics': files_api.parse_qcmetrics(files_raw['qcmetrics']), 'sacct': sacct.parse_sacct(files_raw['sacct']), }
def cancel(context, jobs, analysis_id): """Cancel all jobs in a run.""" analysis_obj = context.obj['store'].analysis(analysis_id) if analysis_obj is None: click.echo('analysis not found') context.abort() elif analysis_obj.status != 'running': click.echo(f"analysis not running: {analysis_obj.status}") context.abort() config_path = Path(analysis_obj.config_path) with config_path.open() as config_stream: config_raw = ruamel.yaml.safe_load(config_stream) config_data = parse_config(config_raw) log_path = Path(f"{config_data['log_path']}") if not log_path.exists(): click.echo(f"missing MIP log file: {log_path}") context.abort() with log_path.open() as log_stream: all_jobs = job_ids(log_stream) if jobs: for job_id in all_jobs: click.echo(job_id) else: for job_id in all_jobs: LOG.debug(f"cancelling job: {job_id}") process = subprocess.Popen(['scancel', job_id]) process.wait() analysis_obj.status = 'canceled' context.obj['store'].commit() click.echo('cancelled analysis successfully!')
def __call__(self, config_stream: List[str], sampleinfo: str = None, sacct: str = None): """Add a new analysis log.""" config_raw = ruamel.yaml.safe_load(config_stream) config_data = files_api.parse_config(config_raw) sampleinfo_path = Path(sampleinfo or config_data['sampleinfo_path']) if not sampleinfo_path.exists(): raise MissingFileError(sampleinfo_path) with sampleinfo_path.open() as stream: sampleinfo_raw = ruamel.yaml.safe_load(stream) sampleinfo_data = files_api.parse_sampleinfo_light(sampleinfo_raw) sacct_path = Path( sacct if sacct else f"{config_data['log_path']}.status") if not sacct_path.exists(): raise MissingFileError(sacct_path) with sacct_path.open() as stream: sacct_jobs = sacct_api.parse_sacct(stream) with Path(config_data['log_path']).open() as stream: jobs = len(miplog.job_ids(stream)) run_data = self.parse(config_data, sampleinfo_data, sacct_jobs, jobs=jobs) self._delete_temp_logs(run_data['case']) new_run = self.build(run_data) if new_run: self.store.add_commit(new_run) return new_run
def test_parse_config(files_raw) -> dict: """ Args: files_raw (dict): With dicts from files """ # GIVEN config data of a "sharp" run (not dry run) config_raw = files_raw['config'] # WHEN parsing the MIP output config config_data = files.parse_config(config_raw) # THEN it should work assert isinstance(config_data, dict) # THEN it should work and the dry run check should be "yes" assert config_data['is_dryrun'] is True # ... and should include all samples assert len(config_raw['analysis_type']) == len(config_data['samples']) # Build dict for return data config_test_data = { 'config_path': '/path_to/cases/case/analysis/case_config.yaml', 'email': '*****@*****.**', 'case': 'case', 'log_path': 'tests/fixtures/case/mip_2019-07-04T10:47:15.log', 'sampleinfo_path': '/path_to/cases/case/analysis/case_qc_sample_info.yaml', 'out_dir': '/path_to/cases/case/analysis', 'priority': 'normal', } # Check returns from def for key, value in config_test_data.items(): assert config_data[key] == value
def add_analysis(cls, config_stream): """Gather information from MIP analysis to store.""" config_raw = ruamel.yaml.safe_load(config_stream) config_data = mip_files.parse_config(config_raw) sampleinfo_raw = ruamel.yaml.safe_load( Path(config_data['sampleinfo_path']).open()) sampleinfo_data = mip_files.parse_sampleinfo(sampleinfo_raw) if sampleinfo_data['is_finished'] is False: raise AnalysisNotFinishedError('analysis not finished') new_bundle = cls._build_bundle(config_data, sampleinfo_data) return new_bundle
def test_parse_config(files_raw) -> dict: """ Args: files_raw (dict): With dicts from files """ # GIVEN config data of a "sharp" run (not dry run) config_raw = files_raw['config'] # WHEN parsing the MIP output config config_data = files.parse_config(config_raw) # THEN it should work assert isinstance(config_data, dict) # THEN it should work and the dry run check should be "no" assert config_data['is_dryrun'] is False # ... and should include all samples assert len(config_raw['analysis_type']) == len(config_data['samples']) # Build dict for return data config_test_data = { 'config_path': 'tests/fixtures/family/family_config.yaml', 'email': '*****@*****.**', 'family': 'family', 'log_path': 'tests/fixtures/family/mip.pl_2017-06-17T12:11:42.log', 'sampleinfo_path': ('/mnt/hds/proj/bioinfo/MIP_ANALYSIS/' + 'customers/cust003/family/analysis/' + 'family_qc_sample_info.yaml'), 'out_dir': ('/mnt/hds/proj/bioinfo/MIP_ANALYSIS/customers/' + 'cust003/family/analysis'), 'priority': 'normal', } # Check returns from def for key, value in config_test_data.items(): assert config_data[key] == value
def __call__(self, config_stream: List[str], sampleinfo: str=None, sacct: str=None): """Add a new analysis log.""" config_raw = ruamel.yaml.safe_load(config_stream) config_data = files_api.parse_config(config_raw) sampleinfo_path = Path(sampleinfo or config_data['sampleinfo_path']) if not sampleinfo_path.exists(): raise MissingFileError(sampleinfo_path) with sampleinfo_path.open() as stream: sampleinfo_raw = ruamel.yaml.safe_load(stream) sampleinfo_data = files_api.parse_sampleinfo(sampleinfo_raw) sacct_path = Path(sacct if sacct else f"{config_data['log_path']}.status") if not sacct_path.exists(): raise MissingFileError(sacct_path) with sacct_path.open() as stream: sacct_jobs = sacct_api.parse_sacct(stream) with Path(config_data['log_path']).open() as stream: jobs = len(miplog.job_ids(stream)) run_data = self.parse(config_data, sampleinfo_data, sacct_jobs, jobs=jobs) self._delete_temp_logs(run_data['family']) new_run = self.build(run_data) if new_run: self.store.add_commit(new_run) return new_run
def get_sampleinfo(analysis: models.Analysis) -> str: """Get the sample info path for an analysis.""" raw_data = ruamel.yaml.safe_load(Path(analysis.config_path).open()) data = files.parse_config(raw_data) return data["sampleinfo_path"]
def check(context: click.Context, family: str): """Delete an analysis log from the database.""" analysis_obj = context.obj['store'].analyses(family=family).first() if analysis_obj is None: LOG.error('no analysis found') context.abort() config_path = Path(analysis_obj.config_path) if not config_path.exists(): LOG.error(f"analysis config not found: {config_path}") context.abort() config_raw = ruamel.yaml.safe_load(config_path.open()) config_data = files.parse_config(config_raw) sampleinfo_raw = ruamel.yaml.safe_load( Path(config_data['sampleinfo_path']).open()) sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw) qcmetrics_path = Path(sampleinfo_data['qcmetrics_path']) if not qcmetrics_path.exists(): LOG.error(f"qc metrics not found: {str(qcmetrics_path)}") context.abort() qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open()) qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) samples = { 'sample': [], 'type': [], 'ped': [], 'chanjo': [], 'peddy': [], 'plink': [], 'duplicates': [], } for sample_data in config_data['samples']: LOG.debug(f"{sample_data['id']}: parse analysis config") samples['sample'].append(sample_data['id']) samples['type'].append(sample_data['type']) for sample_data in sampleinfo_data['samples']: LOG.debug(f"{sample_data['id']}: parse sample info") samples['ped'].append(sample_data['sex']) with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle: sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle) predicted_sex = sexcheck_data['predicted_sex'] xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage'] samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})") for sample_data in qcmetrics_data['samples']: LOG.debug(f"{sample_data['id']}: parse qc metrics") samples['plink'].append(sample_data['plink_sex']) duplicates_percent = sample_data['duplicates'] * 100 samples['duplicates'].append(f"{duplicates_percent:.3f}%") peddy_path = Path(sampleinfo_data['peddy']['sex_check']) if peddy_path.exists(): with peddy_path.open() as sexcheck_handle: peddy_data = files.parse_peddy_sexcheck(sexcheck_handle) for sample_id in samples['sample']: LOG.debug(f"{sample_id}: parse peddy") predicted_sex = peddy_data[sample_id]['predicted_sex'] het_ratio = peddy_data[sample_id]['het_ratio'] samples['peddy'].append(f"{predicted_sex} ({het_ratio})") else: LOG.warning(f"missing peddy output: {peddy_path}") print(tabulate(samples, headers='keys', tablefmt='psql'))
def _parse_raw_mip_config_into_dict(mip_config_raw): config_data = files.parse_config(mip_config_raw) return config_data
def check(context: click.Context, family: str): """Delete an analysis log from the database.""" analysis_obj = context.obj['store'].analyses(family=family).first() if analysis_obj is None: LOG.error('no analysis found') context.abort() config_path = Path(analysis_obj.config_path) if not config_path.exists(): LOG.error(f"analysis config not found: {config_path}") context.abort() config_raw = ruamel.yaml.safe_load(config_path.open()) config_data = files.parse_config(config_raw) sampleinfo_raw = ruamel.yaml.safe_load(Path(config_data['sampleinfo_path']).open()) sampleinfo_data = files.parse_sampleinfo(sampleinfo_raw) qcmetrics_path = Path(sampleinfo_data['qcmetrics_path']) if not qcmetrics_path.exists(): LOG.error(f"qc metrics not found: {str(qcmetrics_path)}") context.abort() qcmetrics_raw = ruamel.yaml.safe_load(qcmetrics_path.open()) qcmetrics_data = files.parse_qcmetrics(qcmetrics_raw) samples = { 'sample': [], 'type': [], 'ped': [], 'chanjo': [], 'peddy': [], 'plink': [], 'duplicates': [], } for sample_data in config_data['samples']: LOG.debug(f"{sample_data['id']}: parse analysis config") samples['sample'].append(sample_data['id']) samples['type'].append(sample_data['type']) for sample_data in sampleinfo_data['samples']: LOG.debug(f"{sample_data['id']}: parse sample info") samples['ped'].append(sample_data['sex']) with Path(sample_data['chanjo_sexcheck']).open() as chanjo_handle: sexcheck_data = files.parse_chanjo_sexcheck(chanjo_handle) predicted_sex = sexcheck_data['predicted_sex'] xy_ratio = sexcheck_data['y_coverage'] / sexcheck_data['x_coverage'] samples['chanjo'].append(f"{predicted_sex} ({xy_ratio:.3f})") for sample_data in qcmetrics_data['samples']: LOG.debug(f"{sample_data['id']}: parse qc metrics") samples['plink'].append(sample_data['plink_sex']) duplicates_percent = sample_data['duplicates'] * 100 samples['duplicates'].append(f"{duplicates_percent:.3f}%") peddy_path = Path(sampleinfo_data['peddy']['sex_check']) if peddy_path.exists(): with peddy_path.open() as sexcheck_handle: peddy_data = files.parse_peddy_sexcheck(sexcheck_handle) for sample_id in samples['sample']: LOG.debug(f"{sample_id}: parse peddy") predicted_sex = peddy_data[sample_id]['predicted_sex'] het_ratio = peddy_data[sample_id]['het_ratio'] samples['peddy'].append(f"{predicted_sex} ({het_ratio})") else: LOG.warning(f"missing peddy output: {peddy_path}") print(tabulate(samples, headers='keys', tablefmt='psql'))