Ejemplo n.º 1
0
def add_sequences_from_sample(session, sample, sequences, props):
    logger.info('Adding {} corrected sequences to sample {}'.format(
        len(sequences), sample.id))
    for sequence in periodic_commit(session, sequences):
        alignment = sequence['alignment']
        try:
            try:
                props.validate(alignment)
            except AlignmentException:
                continue
            if sequence['r_type'] == 'NoResult':
                add_sequences(session, [alignment],
                              sample,
                              error_action='raise')
                session.query(NoResult).filter(
                    NoResult.pk == sequence['pk']).delete(
                        synchronize_session=False)
            elif sequence['r_type'] == 'Sequence':
                fields = {
                    'partial': alignment.partial,
                    'probable_indel_or_misalign': alignment.has_possible_indel,
                    'v_gene': format_ties(alignment.v_gene),
                    'j_gene': format_ties(alignment.j_gene),
                    'num_gaps': alignment.num_gaps,
                    'seq_start': alignment.seq_start,
                    'v_match': alignment.v_match,
                    'v_length': alignment.v_length,
                    'j_match': alignment.j_match,
                    'j_length': alignment.j_length,
                    'removed_prefix':
                    alignment.sequence.removed_prefix_sequence,
                    'removed_prefix_qual':
                    alignment.sequence.removed_prefix_quality,
                    'v_mutation_fraction': alignment.v_mutation_fraction,
                    'pre_cdr3_length': alignment.pre_cdr3_length,
                    'pre_cdr3_match': alignment.pre_cdr3_match,
                    'post_cdr3_length': alignment.post_cdr3_length,
                    'post_cdr3_match': alignment.post_cdr3_match,
                    'in_frame': alignment.in_frame,
                    'functional': alignment.functional,
                    'stop': alignment.stop,
                    'cdr3_nt': alignment.cdr3,
                    'cdr3_num_nts': len(alignment.cdr3),
                    'cdr3_aa': lookups.aas_from_nts(alignment.cdr3),
                    'sequence': str(alignment.sequence.sequence),
                    'quality': alignment.sequence.quality,
                    'locally_aligned': alignment.locally_aligned,
                    '_insertions': serialize_gaps(alignment.insertions),
                    '_deletions': serialize_gaps(alignment.deletions),
                    'germline': alignment.germline
                }
                # This line doesnt actually add anything to the DB, it's just
                # to validate the fields
                Sequence(**fields)

                session.query(Sequence).filter(
                    Sequence.ai == sequence['pk']).update(
                        fields, synchronize_session=False)
        except ValueError:
            continue
Ejemplo n.º 2
0
def add_results(uniques, sample, session):
    metrics = {'muts': [], 'lens': []}
    for unique in itertools.chain(*uniques):
        try:
            add_sequences(session, [unique], sample)
            metrics['lens'].append(unique.v_length)
            metrics['muts'].append(unique.v_mutation_fraction)
        except AlignmentException as e:
            add_noresults_for_vdj(session, unique.sequence, sample, str(e))

    if metrics['lens']:
        sample.v_ties_len = sum(metrics['lens']) / len(metrics['lens'])
        sample.v_ties_mutations = sum(metrics['muts']) / len(metrics['muts'])
    session.commit()
def read_file(session, fmt, handle, sample, v_germlines, j_germlines, props):
    reader = csv.DictReader(handle, delimiter='\t')
    uniques = {}

    for i, line in enumerate(reader):
        if fmt == 'adaptive':
            try:
                line = extract_adaptive_sequence(i, line, v_germlines,
                                                 j_germlines)
            except (AlignmentException, KeyError) as e:
                seq = VDJSequence('seq_{}'.format(i), '')
                add_noresults_for_vdj(session, seq, sample, str(e))
                continue
        seq = VDJSequence(line['SEQUENCE_ID'],
                          line['SEQUENCE_IMGT'].replace('.', '-'))
        if 'DUPCOUNT' in line:
            seq.copy_number = int(line['DUPCOUNT'])
        try:
            alignment = create_alignment(seq, line, v_germlines, j_germlines)
            for other in uniques.setdefault(
                    len(alignment.sequence.sequence), []):
                if dnautils.equal(other.sequence.sequence,
                                  alignment.sequence.sequence):
                    other.sequence.copy_number += (
                        alignment.sequence.copy_number)
                    break
            else:
                uniques[len(alignment.sequence.sequence)].append(alignment)
        except AlignmentException as e:
            add_noresults_for_vdj(session, seq, sample, str(e))

    uniques = [s for k in sorted(uniques.keys()) for s in uniques[k]]
    lens = []
    muts = []
    for unique in uniques:
        try:
            props.validate(unique)
            add_sequences(session, [unique], sample)
            lens.append(unique.v_length)
            muts.append(unique.v_mutation_fraction)
        except AlignmentException as e:
            add_noresults_for_vdj(session, seq, sample, str(e))

    if len(lens) > 0:
        sample.v_ties_len = sum(lens) / len(lens)
        sample.v_ties_mutations = sum(muts) / len(muts)

    session.commit()
Ejemplo n.º 4
0
def read_file(session, fmt, handle, sample, v_germlines, j_germlines, props):
    reader = csv.DictReader(handle, delimiter='\t')
    uniques = {}

    for i, line in enumerate(reader):
        if fmt == 'adaptive':
            try:
                line = extract_adaptive_sequence(i, line, v_germlines,
                                                 j_germlines)
            except (AlignmentException, KeyError) as e:
                seq = VDJSequence('seq_{}'.format(i), '')
                add_noresults_for_vdj(session, seq, sample, str(e))
                continue
        seq = VDJSequence(line['SEQUENCE_ID'],
                          line['SEQUENCE_IMGT'].replace('.', '-'))
        if 'DUPCOUNT' in line:
            seq.copy_number = int(line['DUPCOUNT'])
        try:
            alignment = create_alignment(seq, line, v_germlines, j_germlines)
            for other in uniques.setdefault(len(alignment.sequence.sequence),
                                            []):
                if dnautils.equal(other.sequence.sequence,
                                  alignment.sequence.sequence):
                    other.sequence.copy_number += (
                        alignment.sequence.copy_number)
                    break
            else:
                uniques[len(alignment.sequence.sequence)].append(alignment)
        except AlignmentException as e:
            add_noresults_for_vdj(session, seq, sample, str(e))

    uniques = [s for k in sorted(uniques.keys()) for s in uniques[k]]
    lens = []
    muts = []
    for unique in uniques:
        try:
            props.validate(unique)
            add_sequences(session, [unique], sample)
            lens.append(unique.v_length)
            muts.append(unique.v_mutation_fraction)
        except AlignmentException as e:
            add_noresults_for_vdj(session, seq, sample, str(e))

    if len(lens) > 0:
        sample.v_ties_len = sum(lens) / float(len(lens))
        sample.v_ties_mutations = sum(muts) / float(len(muts))

    session.commit()
def aggregate_collapse(aggregate_queue, db_config, sample_id, props):
    seqs_to_add = []
    session = config.init_db(db_config, create=False)
    sample = session.query(Sample).filter(Sample.id == sample_id).one()
    for i, alignment in enumerate(aggregate_queue):
        for seq in alignment:
            seqs_to_add.append(seq)
            if len(seqs_to_add) >= 1000:
                add_sequences(session, seqs_to_add, sample,
                              strip_alleles=not props.genotyping)
                seqs_to_add = []
                session.commit()
    if seqs_to_add:
        add_sequences(session, seqs_to_add, sample,
                      strip_alleles=not props.genotyping)
    logger.info('Finished aggregating sequences')
    session.commit()
    session.close()
Ejemplo n.º 6
0
def aggregate_collapse(aggregate_queue, db_config, sample_id, props):
    seqs_to_add = []
    session = config.init_db(db_config, create=False)
    sample = session.query(Sample).filter(Sample.id == sample_id).one()
    for i, alignment in enumerate(aggregate_queue):
        for seq in alignment:
            seqs_to_add.append(seq)
            if len(seqs_to_add) >= 1000:
                add_sequences(session, seqs_to_add, sample,
                              strip_alleles=not props.genotyping)
                seqs_to_add = []
                session.commit()
    if seqs_to_add:
        add_sequences(session, seqs_to_add, sample,
                      strip_alleles=not props.genotyping)
    logger.info('Finished aggregating sequences')
    session.commit()
    session.close()
Ejemplo n.º 7
0
def add_sequences_from_sample(session, sample, sequences, props):
    logger.info('Adding {} corrected sequences to sample {}'.format(
        len(sequences), sample.id))
    for sequence in periodic_commit(session, sequences):
        alignment = sequence['alignment']
        try:
            try:
                props.validate(alignment)
            except AlignmentException:
                continue
            if sequence['r_type'] == 'NoResult':
                add_sequences(session, [alignment], sample,
                              error_action='raise')
                session.query(NoResult).filter(
                    NoResult.pk == sequence['pk']
                ).delete(synchronize_session=False)
            elif sequence['r_type'] == 'Sequence':
                fields = {
                    'partial': alignment.partial,

                    'probable_indel_or_misalign':
                        alignment.has_possible_indel,

                    'v_gene': format_ties(alignment.v_gene),
                    'j_gene': format_ties(alignment.j_gene),

                    'num_gaps': alignment.num_gaps,
                    'seq_start': alignment.seq_start,

                    'v_match': alignment.v_match,
                    'v_length': alignment.v_length,
                    'j_match': alignment.j_match,
                    'j_length': alignment.j_length,

                    'removed_prefix':
                        alignment.sequence.removed_prefix_sequence,
                    'removed_prefix_qual':
                        alignment.sequence.removed_prefix_quality,
                    'v_mutation_fraction': alignment.v_mutation_fraction,

                    'pre_cdr3_length': alignment.pre_cdr3_length,
                    'pre_cdr3_match': alignment.pre_cdr3_match,
                    'post_cdr3_length': alignment.post_cdr3_length,
                    'post_cdr3_match': alignment.post_cdr3_match,

                    'in_frame': alignment.in_frame,
                    'functional': alignment.functional,
                    'stop': alignment.stop,

                    'cdr3_nt': alignment.cdr3,
                    'cdr3_num_nts': len(alignment.cdr3),
                    'cdr3_aa': lookups.aas_from_nts(alignment.cdr3),

                    'sequence': str(alignment.sequence.sequence),
                    'quality': alignment.sequence.quality,

                    'locally_aligned': alignment.locally_aligned,
                    '_insertions': serialize_gaps(alignment.insertions),
                    '_deletions': serialize_gaps(alignment.deletions),

                    'germline': alignment.germline
                }
                # This line doesnt actually add anything to the DB, it's just
                # to validate the fields
                Sequence(**fields)

                session.query(Sequence).filter(
                    Sequence.ai == sequence['pk']
                ).update(fields, synchronize_session=False)
        except ValueError:
            continue