Ejemplo n.º 1
0
Archivo: samples.py Proyecto: CGHQ/cghq
def get_sample_ready(sample_id):
    """Check if sample has sufficient reads."""
    sample_res = glue.get('lims', 'samples', sample_id)
    target_reads = sample_res.json['target_reads']
    ready_res = glue.get('moneypenny', 'samples', 'ready', sample_id,
                         str(target_reads))
    return ready_res.json
Ejemplo n.º 2
0
def post_coverage(cust_id, case_id):
    """Load coverage for all samples in a case."""
    analysis_res = glue.get("analyze", "analyses", cust_id, case_id)
    samples = []
    for sample_id, bed_file in iteritems(analysis_res.json["chanjo_output"]):
        sample_res = glue.get("lims", "samples", sample_id)
        # use customer sample id
        alt_id = sample_res.json["name"]
        load_payload = {"bed_file": bed_file, "sample_id": alt_id, "group_id": case_id}
        glue.post("chanjo", "samples", data=load_payload)
        samples.append(sample_res.json)

    return samples
Ejemplo n.º 3
0
Archivo: samples.py Proyecto: CGHQ/cghq
def get_sample_status(sample_id=None, project_id=None, sample_ids=None):
    """Check the status of a list of samples or a project."""
    if sample_id:
        sample_ids = [sample_id]
    if project_id:
        proj_res = glue.get('lims', 'samples', params={'project_id': project_id})
        proj_samples = [sample['id'] for sample in proj_res.json['samples']]
        sample_ids = itertools.chain(sample_ids, proj_samples)

    samples_data = []
    for sample_id in sample_ids:
        status_res = glue.get('moneypenny', 'samples', 'status', sample_id)
        samples_data.append(status_res.json)

    return samples_data
Ejemplo n.º 4
0
Archivo: cases.py Proyecto: CGHQ/cghq
def get_case_ready(cust_id, case_id):
    """Check is all samples in case are ready."""
    case_res = glue.get('moneypenny', 'cases', cust_id, case_id)
    sample_ids = set(sample['lims_id'] for sample in case_res.json['samples'])

    samples_res = glue.get('lims', 'samples', params={'cust_id': cust_id,
                                                      'case_id': case_id})
    lims_ids = set(sample['id'] for sample in samples_res.json['samples'])
    if sample_ids != lims_ids:
        logger.warn("DB: {}, LIMS: {}".format('|'.join(sample_ids),
                                              '|'.join(lims_ids)))
        raise AttributeError('database vs LIMS samples are out of sync')

    all_samples = []
    for sample_id in sample_ids:
        logger.info('checking status of %s', sample_id)
        all_samples.append(get_sample_ready(sample_id))

    return all(all_samples)
Ejemplo n.º 5
0
def post_pedigree(cust_id, case_id):
    """Write pedigree to correct location."""
    logger.debug('gather information from LIMS')
    samples_res = glue.get('lims', 'samples', params={'cust_id': cust_id,
                                                      'case_id': case_id})
    analysis_type = utils.get_analysis_type(samples_res.json['analysis_types'])
    ped_content = get_pedigree(cust_id, case_id)
    docase_res = glue.post('assemble', 'cases', 'prepare', cust_id, case_id,
                           data={'analysis_type': analysis_type,
                                 'ped_content': ped_content})
    return docase_res.json
Ejemplo n.º 6
0
Archivo: scout.py Proyecto: CGHQ/cghq
def post_upload(upload_root, cust_id, case_id, update=False):
    """Process analysis and upload data to Scout."""
    logger.debug('build out folder structure')
    out_base = path(upload_root)
    upload_dir = out_base.joinpath(cust_id, case_id)
    upload_dir.makedirs_p()
    outped_file = upload_dir.joinpath('pedigree.madeline.tsv')

    logger.debug('retrive analysis data')
    analysis_res = glue.get('analyze', 'analyses', cust_id, case_id)

    if analysis_res.json['is_complete']:
        case_res = glue.get('scout', 'cases', cust_id, case_id)
        if (not case_res.json) or update:
            logger.debug('prepare for Scout upload')
            post_coverage(cust_id, case_id)
            coverage_report = post_report(upload_root, cust_id, case_id)

            logger.debug('run madeline')
            mad_res = glue.post('analyze', 'madeline', 'run', cust_id, case_id,
                                data={'out_ped': outped_file})

            logger.debug('write scout config file')
            ped_svg = mad_res.json['output']
            scout_config = post_config(cust_id, case_id, coverage_report,
                                       ped_svg)

            logger.debug('load data into scout')
            glue.post('scout', 'cases', data={'scout_config': scout_config})

            logger.debug('load the updated case from scout')
            newcase_res = glue.get('scout', 'cases', cust_id, case_id)

            add_payload = {'cust_id': cust_id, 'case_id': case_id,
                           'uploaded_at': newcase_res.json['created_at']}
            add_res = glue.post('moneypenny', 'uploads', data=add_payload)
            return add_res.json()
        else:
            raise ScoutUploadConflictError('case already uploaded')
    else:
        return AnalysisNotCompleteError('case not yet analyzed')
Ejemplo n.º 7
0
def post_concordance(sample_id):
    """Match genotypes across samples to confirm concordance."""
    sample_res = glue.get('lims', 'samples', sample_id)
    cust_id = sample_res.json['customer']
    case_id = sample_res.json['family_id']

    analysis_res = glue.get('analyze', 'analyses', cust_id, case_id)
    started_at = analysis_res.json['analyzed_at']

    logger.debug('match sample genotypes')
    match_res = glue.get('genotype', 'samples', 'match', sample_id)
    top_hit = match_res.json['results'][0]

    logger.debug('persist the comparison result')
    add_payload = {'sample_id': sample_id, 'started_at': started_at,
                   'matches': top_hit['match'],
                   'mismatches': top_hit.get('mismatch', 0),
                   'unknowns': top_hit.get('unknown', 0),
                   'is_failed': match_res.json.get('is_failed')}
    add_res = glue.post('moneypenny', 'genotypings', data=add_payload)
    return add_res.json
Ejemplo n.º 8
0
def post_genotypes(cust_id, case_id):
    """Load genotypes for the most recent analysis of a case."""
    analysis_res = glue.get('analyze', 'analyses', cust_id, case_id)

    if analysis_res.json['is_complete']:
        logger.debug('loading genotypes...')
        payload = {'type': 'vcf', 'file': analysis_res.json['ready_vcf']}
        addgt_res = glue.post('genotype', 'samples', data=payload)
    else:
        logger.error('analysis not complete')
        raise AnalysisNotCompleteError
    return addgt_res.json
Ejemplo n.º 9
0
Archivo: spofs.py Proyecto: CGHQ/cghq
def post_spof(flowcell_id, sample_id, reads):
    """Add spof and flowcell to the status database."""
    logger.debug('fetch related FASTQ files')
    fq_res = glue.get('demux', 'spofs', flowcell_id, sample_id)

    logger.debug('add spof to database')
    data = {'read_count': reads,
            'fastq_files': fq_res.json['fastq_files'],
            'sample_id': sample_id,
            'flowcell_id': flowcell_id}
    add_res = glue.post('moneypenny', 'spofs', data=data)
    return add_res.json
Ejemplo n.º 10
0
Archivo: scout.py Proyecto: CGHQ/cghq
def post_config(upload_root, cust_id, case_id, coverage_report, ped_svg):
    """Write Scout config file for a case/latest analysis."""
    config_res = glue.get('analyze', 'analyses', 'scout', cust_id, case_id)
    config_res.json['coverage_report'] = os.path.abspath(coverage_report)
    config_res.json['madeline'] = os.path.abspath(ped_svg)

    logger.debug('figure out where to write config')
    out_base = path(upload_root)
    upload_dir = out_base.joinpath(cust_id, case_id)
    upload_dir.makedirs_p()
    config_file = upload_dir.joinpath('scout-config-clinical.ini')
    write_ini(config_file, config_res.json)
    return config_file
Ejemplo n.º 11
0
def post_start_analysis(cust_id, case_id, gene_list=None, mip_config=None,
                        add_only=False):
    """Start analysis for a case."""
    filters = {'cust_id': cust_id, 'case_id': case_id}
    samples_res = glue.get('lims', 'samples', params=filters)
    analysis_types = samples_res.json['analysis_types']
    analysis_type = get_analysis_type(analysis_types)

    if add_only:
        case_res = glue.get('analyze', 'analyses', cust_id, case_id)
    else:
        start_payload = {'seq_type': analysis_type, 'gene_list': gene_list,
                         'mip_config': mip_config}
        case_res = glue.post('analyze', 'analyses', 'start', cust_id, case_id,
                             data=start_payload)

    logger.debug('persist the analysis to the database')
    analyzed_at = case_res.json['analyzed_at']
    add_payload = {'cust_id': cust_id, 'case_id': case_id,
                   'analysis_type': analysis_type, 'analyzed_at': analyzed_at}
    add_res = glue.post('moneypenny', 'analyses', data=add_payload)
    return add_res.json
Ejemplo n.º 12
0
Archivo: spofs.py Proyecto: CGHQ/cghq
def post_symlink_spof(demux_root, flowcell_id, sample_id, reason='analysis'):
    """Assemble/symlink files for a sample flowcell directory.

    Args:
        demux_root (str): root path to demux folder
        sample_id (str): globally unique sample id
        flowcell_id (str): flowcell id
    """
    logger.debug('gather information from LIMS')
    sample_res = glue.get('lims', 'samples', sample_id)
    analysis_type = sample_res.json['analysis_type']
    case_id = sample_res.json['family_id']
    cust_id = sample_res.json['customer']

    logger.debug('gather information from status db')
    spof_res = glue.get('moneypenny', 'spofs', flowcell_id, sample_id)
    demux_folder = spof_res.json['flowcell']['demux_folder']
    demuxed_at = dateify(spof_res.json['flowcell']['demuxed_at']).date()
    spof_dir = build_sampledir(demux_root, demux_folder, sample_id)
    fastq_paths = [spof_dir.joinpath(fq_file['file_name'])
                   for fq_file in spof_res.json['fastq_files']]

    logger.debug("symlink FASTQ files for %s/%s", sample_id, flowcell_id)
    payload = {'cust_id': cust_id, 'case_id': case_id,
               'analysis_type': analysis_type, 'fastq_paths': fastq_paths,
               'demuxed_at': demuxed_at.strftime('%y%m%d')}
    symlink_res = glue.post('assemble', 'spofs', 'prepare', flowcell_id,
                            sample_id, data=payload)

    for fastq_path, symlink_path in symlink_res.json['symlinks']:
        fastq_file = path(fastq_path).basename()
        data = {'fastq_file': fastq_file, 'symlink': symlink_path,
                'flowcell_id': flowcell_id, 'reason': reason}
        glue.post('moneypenny', 'symlinks', data=data)

    return symlink_res.json
Ejemplo n.º 13
0
def post(demux_root, flowcell_id):
    """Add a new flowcell with reads to the database."""
    demux_res = glue.get('demux', 'flowcells', flowcell_id)
    nested_samples = ((sample for sample in basemask['samples'])
                      for basemask in demux_res.json['basemasks'])
    samples = itertools.chain.from_iterable(nested_samples)
    flowcell_path = path(demux_res.json['directory'])
    flowcell_dir = flowcell_path.basename()

    logger.debug('add flowcell to database')
    add_res = glue.post('moneypenny', 'flowcells',
                        data={'flowcell_dir': flowcell_dir})

    for sample in samples:
        if not sample['sample_id'].startswith('lane'):
            post_spof(flowcell_id, sample['sample_id'], sample['reads'])
    return add_res.json
Ejemplo n.º 14
0
Archivo: samples.py Proyecto: CGHQ/cghq
def post_sample(sample_id):
    """Add sample from LIMS to Moneypenny.

    Args:
        lims_id (str): globally unique sample id

    Returns:
        dict: serialized representation of the sample object
    """
    samples_res = glue.get('lims', 'samples', sample_id)
    cust_id = samples_res.json['customer']
    case_id = samples_res.json['family_id']

    data = dict(cust_id=cust_id, case_id=case_id)
    glue.post('moneypenny', 'cases', data=data)
    data['sample_id'] = sample_id
    sample_res = glue.post('moneypenny', 'samples', data=data)
    return sample_res.json
Ejemplo n.º 15
0
def post_report(upload_root, cust_id, case_id):
    """Write coverage report for a case."""
    logger.debug("determine where to write report")
    out_base = path(upload_root)
    upload_dir = out_base.joinpath(cust_id, case_id)
    upload_dir.makedirs_p()
    report_file = upload_dir.joinpath("{}.coverage.pdf".format(case_id))

    logger.debug("create dynamic gene panel")
    panel_res = glue.get("analyze", "virtual-panel", cust_id, case_id)
    panel_file = upload_dir.joinpath("{}.gene-panel.txt".format(case_id))
    with panel_file.open("w") as handle:
        handle.write("\n".join(panel_res.json["hgnc_symbols"]))

    report_payload = {
        "group_id": case_id,
        "panel": panel_res.json["hgnc_symbols"],
        "panel_name": panel_res.json["name"],
    }
    report_res = glue.post("chanjo", "report", data=report_payload)

    with report_file.open("wb") as handle:
        handle.write(report_res.json()["pdf"])
    return report_file
Ejemplo n.º 16
0
def get_pedigree(cust_id, case_id):
    """Generate and write pedigree file for a case."""
    logger.debug('generate pedigree file')
    ped_res = glue.get('lims', 'cases', 'pedigree', cust_id, case_id)
    return ped_res.json['content']
Ejemplo n.º 17
0
def get_track_analysis(cust_id, case_id):
    """Keep track of running/completed analyses."""
    status_res = glue.get('analyze', 'analyses', 'status', cust_id, case_id)
    status_res.json.update({'cust_id': cust_id, 'case_id': case_id})
    return status_res.json