예제 #1
0
def get_selection(session, filter_type=None, sample_ids=None):
    query = session.query(SelectionPressure).options(
        joinedload(SelectionPressure.clone),
        joinedload(SelectionPressure.sample),
    )
    if filter_type == 'overall':
        query = query.filter(SelectionPressure.sample_id.is_(None))
    elif filter_type == 'samples':
        if sample_ids:
            query.filter(SelectionPressure.sample_id.in_(sample_ids))
        else:
            query = query.filter(~SelectionPressure.sample_id.is_(None))

    base_fields = SelectionPressure.__table__.c.keys()
    base_fields.remove('id')
    base_fields.remove('sample_id')

    writer = StreamingTSV(['sample', 'subject'] + base_fields)
    yield writer.writeheader()

    for sel in yield_limit(query, SelectionPressure.id):
        row = {f: getattr(sel, f) for f in base_fields}
        row['sample'] = sel.sample.name if sel.sample else 'All Samples'
        row['subject'] = sel.clone.subject.identifier
        yield writer.writerow(row)
def get_sequences(session, sample, fmt, clones_only, min_subject_copies):
    meta_keys = set([m.key for m in session.query(SampleMetadata.key)])

    seqs = session.query(Sequence).filter(
        Sequence.sample_id == sample.id
    ).join(
        SequenceCollapse
    ).options(
        joinedload(Sequence.clone),
        joinedload(Sequence.collapse),
        joinedload(Sequence.sample),
        joinedload(Sequence.subject),
    )

    if clones_only:
        seqs = seqs.filter(~Sequence.clone_id.is_(None))
    if min_subject_copies:
        seqs = seqs.filter(
            SequenceCollapse.copy_number_in_subject >=
            min_subject_copies
        )

    writer = SequenceWriter(fmt, meta_keys)

    yield writer.writeheader()
    for seq in yield_limit(seqs, Sequence.ai):
        yield writer.writeseq(seq)
예제 #3
0
def get_sequences(session, seqs, format_name):
    seqs = seqs.join(SequenceCollapse)
    seqs = seqs.options(
        joinedload(Sequence.clone),
        joinedload(Sequence.collapse),
        joinedload(Sequence.sample),
        joinedload(Sequence.subject),
    )
    meta_keys = set([m.key for m in session.query(SampleMetadata.key)])
    writer = SequenceWriter(format_name, meta_keys)

    yield writer.writeheader()
    for seq in yield_limit(seqs, Sequence.ai):
        yield writer.writeseq(seq)
예제 #4
0
def get_clone_summary(session, include_lineages):
    fields = [
        'clone_id', 'subject', 'v_gene', 'j_gene', 'functional', 'insertions',
        'deletions', 'cdr3_nt', 'cdr3_num_nt', 'cdr3_aa', 'uniques',
        'instances', 'copies', 'germline', 'parent_id',
        'avg_mutations_per_copy'
    ]
    if include_lineages:
        fields.append('lineage')
    writer = StreamingTSV(fields)

    yield writer.writeheader()
    for clone in yield_limit(session.query(Clone), Clone.id):
        row = {}
        for field in writer.fieldnames:
            try:
                row[field] = getattr(clone, field)
            except AttributeError:
                pass
        row.update({
            'clone_id':
            clone.id,
            'subject':
            clone.subject.identifier,
            'functional':
            'T' if clone.functional else 'F',
            'insertions':
            clone._insertions,
            'deletions':
            clone._deletions,
            'uniques':
            clone.overall_unique_cnt,
            'instances':
            clone.overall_instance_cnt,
            'copies':
            clone.overall_total_cnt,
            'avg_mutations_per_copy':
            round(clone.overall_stats.total_mutations(normalize=True), 2)
        })
        if include_lineages:
            row['lineage'] = clone.tree
        yield writer.writerow(row)
예제 #5
0
def get_clone_overlap(session):
    writer = StreamingTSV(
        ['clone_id', 'sample', 'uniques', 'copies', 'avg_mutations_per_copy'])

    stats = session.query(CloneStats).filter(~CloneStats.sample_id.is_(None))

    yield writer.writeheader()
    for stat in yield_limit(stats, CloneStats.id):
        yield writer.writerow({
            'clone_id':
            stat.clone_id,
            'sample':
            stat.sample.name,
            'uniques':
            stat.unique_cnt,
            'copies':
            stat.total_cnt,
            'avg_mutations_per_copy':
            round(stat.total_mutations(normalize=True), 2)
        })
예제 #6
0
def get_sequences(session, sample, fmt, clones_only, min_subject_copies):
    meta_keys = set([m.key for m in session.query(SampleMetadata.key)])

    seqs = session.query(Sequence).filter(
        Sequence.sample_id == sample.id).join(SequenceCollapse).options(
            joinedload(Sequence.clone),
            joinedload(Sequence.collapse),
            joinedload(Sequence.sample),
            joinedload(Sequence.subject),
        )

    if clones_only:
        seqs = seqs.filter(~Sequence.clone_id.is_(None))
    if min_subject_copies:
        seqs = seqs.filter(
            SequenceCollapse.copy_number_in_subject >= min_subject_copies)

    writer = SequenceWriter(fmt, meta_keys)

    yield writer.writeheader()
    for seq in yield_limit(seqs, Sequence.ai):
        yield writer.writeseq(seq)