def get_sample_ready(sample_id): """Check if sample has sufficient reads.""" sample_res = glue.get('lims', 'samples', sample_id) target_reads = sample_res.json['target_reads'] ready_res = glue.get('moneypenny', 'samples', 'ready', sample_id, str(target_reads)) return ready_res.json
def post_coverage(cust_id, case_id): """Load coverage for all samples in a case.""" analysis_res = glue.get("analyze", "analyses", cust_id, case_id) samples = [] for sample_id, bed_file in iteritems(analysis_res.json["chanjo_output"]): sample_res = glue.get("lims", "samples", sample_id) # use customer sample id alt_id = sample_res.json["name"] load_payload = {"bed_file": bed_file, "sample_id": alt_id, "group_id": case_id} glue.post("chanjo", "samples", data=load_payload) samples.append(sample_res.json) return samples
def get_sample_status(sample_id=None, project_id=None, sample_ids=None): """Check the status of a list of samples or a project.""" if sample_id: sample_ids = [sample_id] if project_id: proj_res = glue.get('lims', 'samples', params={'project_id': project_id}) proj_samples = [sample['id'] for sample in proj_res.json['samples']] sample_ids = itertools.chain(sample_ids, proj_samples) samples_data = [] for sample_id in sample_ids: status_res = glue.get('moneypenny', 'samples', 'status', sample_id) samples_data.append(status_res.json) return samples_data
def get_case_ready(cust_id, case_id): """Check is all samples in case are ready.""" case_res = glue.get('moneypenny', 'cases', cust_id, case_id) sample_ids = set(sample['lims_id'] for sample in case_res.json['samples']) samples_res = glue.get('lims', 'samples', params={'cust_id': cust_id, 'case_id': case_id}) lims_ids = set(sample['id'] for sample in samples_res.json['samples']) if sample_ids != lims_ids: logger.warn("DB: {}, LIMS: {}".format('|'.join(sample_ids), '|'.join(lims_ids))) raise AttributeError('database vs LIMS samples are out of sync') all_samples = [] for sample_id in sample_ids: logger.info('checking status of %s', sample_id) all_samples.append(get_sample_ready(sample_id)) return all(all_samples)
def post_pedigree(cust_id, case_id): """Write pedigree to correct location.""" logger.debug('gather information from LIMS') samples_res = glue.get('lims', 'samples', params={'cust_id': cust_id, 'case_id': case_id}) analysis_type = utils.get_analysis_type(samples_res.json['analysis_types']) ped_content = get_pedigree(cust_id, case_id) docase_res = glue.post('assemble', 'cases', 'prepare', cust_id, case_id, data={'analysis_type': analysis_type, 'ped_content': ped_content}) return docase_res.json
def post_upload(upload_root, cust_id, case_id, update=False): """Process analysis and upload data to Scout.""" logger.debug('build out folder structure') out_base = path(upload_root) upload_dir = out_base.joinpath(cust_id, case_id) upload_dir.makedirs_p() outped_file = upload_dir.joinpath('pedigree.madeline.tsv') logger.debug('retrive analysis data') analysis_res = glue.get('analyze', 'analyses', cust_id, case_id) if analysis_res.json['is_complete']: case_res = glue.get('scout', 'cases', cust_id, case_id) if (not case_res.json) or update: logger.debug('prepare for Scout upload') post_coverage(cust_id, case_id) coverage_report = post_report(upload_root, cust_id, case_id) logger.debug('run madeline') mad_res = glue.post('analyze', 'madeline', 'run', cust_id, case_id, data={'out_ped': outped_file}) logger.debug('write scout config file') ped_svg = mad_res.json['output'] scout_config = post_config(cust_id, case_id, coverage_report, ped_svg) logger.debug('load data into scout') glue.post('scout', 'cases', data={'scout_config': scout_config}) logger.debug('load the updated case from scout') newcase_res = glue.get('scout', 'cases', cust_id, case_id) add_payload = {'cust_id': cust_id, 'case_id': case_id, 'uploaded_at': newcase_res.json['created_at']} add_res = glue.post('moneypenny', 'uploads', data=add_payload) return add_res.json() else: raise ScoutUploadConflictError('case already uploaded') else: return AnalysisNotCompleteError('case not yet analyzed')
def post_concordance(sample_id): """Match genotypes across samples to confirm concordance.""" sample_res = glue.get('lims', 'samples', sample_id) cust_id = sample_res.json['customer'] case_id = sample_res.json['family_id'] analysis_res = glue.get('analyze', 'analyses', cust_id, case_id) started_at = analysis_res.json['analyzed_at'] logger.debug('match sample genotypes') match_res = glue.get('genotype', 'samples', 'match', sample_id) top_hit = match_res.json['results'][0] logger.debug('persist the comparison result') add_payload = {'sample_id': sample_id, 'started_at': started_at, 'matches': top_hit['match'], 'mismatches': top_hit.get('mismatch', 0), 'unknowns': top_hit.get('unknown', 0), 'is_failed': match_res.json.get('is_failed')} add_res = glue.post('moneypenny', 'genotypings', data=add_payload) return add_res.json
def post_genotypes(cust_id, case_id): """Load genotypes for the most recent analysis of a case.""" analysis_res = glue.get('analyze', 'analyses', cust_id, case_id) if analysis_res.json['is_complete']: logger.debug('loading genotypes...') payload = {'type': 'vcf', 'file': analysis_res.json['ready_vcf']} addgt_res = glue.post('genotype', 'samples', data=payload) else: logger.error('analysis not complete') raise AnalysisNotCompleteError return addgt_res.json
def post_spof(flowcell_id, sample_id, reads): """Add spof and flowcell to the status database.""" logger.debug('fetch related FASTQ files') fq_res = glue.get('demux', 'spofs', flowcell_id, sample_id) logger.debug('add spof to database') data = {'read_count': reads, 'fastq_files': fq_res.json['fastq_files'], 'sample_id': sample_id, 'flowcell_id': flowcell_id} add_res = glue.post('moneypenny', 'spofs', data=data) return add_res.json
def post_config(upload_root, cust_id, case_id, coverage_report, ped_svg): """Write Scout config file for a case/latest analysis.""" config_res = glue.get('analyze', 'analyses', 'scout', cust_id, case_id) config_res.json['coverage_report'] = os.path.abspath(coverage_report) config_res.json['madeline'] = os.path.abspath(ped_svg) logger.debug('figure out where to write config') out_base = path(upload_root) upload_dir = out_base.joinpath(cust_id, case_id) upload_dir.makedirs_p() config_file = upload_dir.joinpath('scout-config-clinical.ini') write_ini(config_file, config_res.json) return config_file
def post_start_analysis(cust_id, case_id, gene_list=None, mip_config=None, add_only=False): """Start analysis for a case.""" filters = {'cust_id': cust_id, 'case_id': case_id} samples_res = glue.get('lims', 'samples', params=filters) analysis_types = samples_res.json['analysis_types'] analysis_type = get_analysis_type(analysis_types) if add_only: case_res = glue.get('analyze', 'analyses', cust_id, case_id) else: start_payload = {'seq_type': analysis_type, 'gene_list': gene_list, 'mip_config': mip_config} case_res = glue.post('analyze', 'analyses', 'start', cust_id, case_id, data=start_payload) logger.debug('persist the analysis to the database') analyzed_at = case_res.json['analyzed_at'] add_payload = {'cust_id': cust_id, 'case_id': case_id, 'analysis_type': analysis_type, 'analyzed_at': analyzed_at} add_res = glue.post('moneypenny', 'analyses', data=add_payload) return add_res.json
def post_symlink_spof(demux_root, flowcell_id, sample_id, reason='analysis'): """Assemble/symlink files for a sample flowcell directory. Args: demux_root (str): root path to demux folder sample_id (str): globally unique sample id flowcell_id (str): flowcell id """ logger.debug('gather information from LIMS') sample_res = glue.get('lims', 'samples', sample_id) analysis_type = sample_res.json['analysis_type'] case_id = sample_res.json['family_id'] cust_id = sample_res.json['customer'] logger.debug('gather information from status db') spof_res = glue.get('moneypenny', 'spofs', flowcell_id, sample_id) demux_folder = spof_res.json['flowcell']['demux_folder'] demuxed_at = dateify(spof_res.json['flowcell']['demuxed_at']).date() spof_dir = build_sampledir(demux_root, demux_folder, sample_id) fastq_paths = [spof_dir.joinpath(fq_file['file_name']) for fq_file in spof_res.json['fastq_files']] logger.debug("symlink FASTQ files for %s/%s", sample_id, flowcell_id) payload = {'cust_id': cust_id, 'case_id': case_id, 'analysis_type': analysis_type, 'fastq_paths': fastq_paths, 'demuxed_at': demuxed_at.strftime('%y%m%d')} symlink_res = glue.post('assemble', 'spofs', 'prepare', flowcell_id, sample_id, data=payload) for fastq_path, symlink_path in symlink_res.json['symlinks']: fastq_file = path(fastq_path).basename() data = {'fastq_file': fastq_file, 'symlink': symlink_path, 'flowcell_id': flowcell_id, 'reason': reason} glue.post('moneypenny', 'symlinks', data=data) return symlink_res.json
def post(demux_root, flowcell_id): """Add a new flowcell with reads to the database.""" demux_res = glue.get('demux', 'flowcells', flowcell_id) nested_samples = ((sample for sample in basemask['samples']) for basemask in demux_res.json['basemasks']) samples = itertools.chain.from_iterable(nested_samples) flowcell_path = path(demux_res.json['directory']) flowcell_dir = flowcell_path.basename() logger.debug('add flowcell to database') add_res = glue.post('moneypenny', 'flowcells', data={'flowcell_dir': flowcell_dir}) for sample in samples: if not sample['sample_id'].startswith('lane'): post_spof(flowcell_id, sample['sample_id'], sample['reads']) return add_res.json
def post_sample(sample_id): """Add sample from LIMS to Moneypenny. Args: lims_id (str): globally unique sample id Returns: dict: serialized representation of the sample object """ samples_res = glue.get('lims', 'samples', sample_id) cust_id = samples_res.json['customer'] case_id = samples_res.json['family_id'] data = dict(cust_id=cust_id, case_id=case_id) glue.post('moneypenny', 'cases', data=data) data['sample_id'] = sample_id sample_res = glue.post('moneypenny', 'samples', data=data) return sample_res.json
def post_report(upload_root, cust_id, case_id): """Write coverage report for a case.""" logger.debug("determine where to write report") out_base = path(upload_root) upload_dir = out_base.joinpath(cust_id, case_id) upload_dir.makedirs_p() report_file = upload_dir.joinpath("{}.coverage.pdf".format(case_id)) logger.debug("create dynamic gene panel") panel_res = glue.get("analyze", "virtual-panel", cust_id, case_id) panel_file = upload_dir.joinpath("{}.gene-panel.txt".format(case_id)) with panel_file.open("w") as handle: handle.write("\n".join(panel_res.json["hgnc_symbols"])) report_payload = { "group_id": case_id, "panel": panel_res.json["hgnc_symbols"], "panel_name": panel_res.json["name"], } report_res = glue.post("chanjo", "report", data=report_payload) with report_file.open("wb") as handle: handle.write(report_res.json()["pdf"]) return report_file
def get_pedigree(cust_id, case_id): """Generate and write pedigree file for a case.""" logger.debug('generate pedigree file') ped_res = glue.get('lims', 'cases', 'pedigree', cust_id, case_id) return ped_res.json['content']
def get_track_analysis(cust_id, case_id): """Keep track of running/completed analyses.""" status_res = glue.get('analyze', 'analyses', 'status', cust_id, case_id) status_res.json.update({'cust_id': cust_id, 'case_id': case_id}) return status_res.json