def add_sequences_from_sample(session, sample, sequences, props): logger.info('Adding {} corrected sequences to sample {}'.format( len(sequences), sample.id)) for sequence in periodic_commit(session, sequences): alignment = sequence['alignment'] try: try: props.validate(alignment) except AlignmentException: continue if sequence['r_type'] == 'NoResult': add_sequences(session, [alignment], sample, error_action='raise') session.query(NoResult).filter( NoResult.pk == sequence['pk']).delete( synchronize_session=False) elif sequence['r_type'] == 'Sequence': fields = { 'partial': alignment.partial, 'probable_indel_or_misalign': alignment.has_possible_indel, 'v_gene': format_ties(alignment.v_gene), 'j_gene': format_ties(alignment.j_gene), 'num_gaps': alignment.num_gaps, 'seq_start': alignment.seq_start, 'v_match': alignment.v_match, 'v_length': alignment.v_length, 'j_match': alignment.j_match, 'j_length': alignment.j_length, 'removed_prefix': alignment.sequence.removed_prefix_sequence, 'removed_prefix_qual': alignment.sequence.removed_prefix_quality, 'v_mutation_fraction': alignment.v_mutation_fraction, 'pre_cdr3_length': alignment.pre_cdr3_length, 'pre_cdr3_match': alignment.pre_cdr3_match, 'post_cdr3_length': alignment.post_cdr3_length, 'post_cdr3_match': alignment.post_cdr3_match, 'in_frame': alignment.in_frame, 'functional': alignment.functional, 'stop': alignment.stop, 'cdr3_nt': alignment.cdr3, 'cdr3_num_nts': len(alignment.cdr3), 'cdr3_aa': lookups.aas_from_nts(alignment.cdr3), 'sequence': str(alignment.sequence.sequence), 'quality': alignment.sequence.quality, 'locally_aligned': alignment.locally_aligned, '_insertions': serialize_gaps(alignment.insertions), '_deletions': serialize_gaps(alignment.deletions), 'germline': alignment.germline } # This line doesnt actually add anything to the DB, it's just # to validate the fields Sequence(**fields) session.query(Sequence).filter( Sequence.ai == sequence['pk']).update( fields, synchronize_session=False) except ValueError: continue
def add_results(uniques, sample, session): metrics = {'muts': [], 'lens': []} for unique in itertools.chain(*uniques): try: add_sequences(session, [unique], sample) metrics['lens'].append(unique.v_length) metrics['muts'].append(unique.v_mutation_fraction) except AlignmentException as e: add_noresults_for_vdj(session, unique.sequence, sample, str(e)) if metrics['lens']: sample.v_ties_len = sum(metrics['lens']) / len(metrics['lens']) sample.v_ties_mutations = sum(metrics['muts']) / len(metrics['muts']) session.commit()
def read_file(session, fmt, handle, sample, v_germlines, j_germlines, props): reader = csv.DictReader(handle, delimiter='\t') uniques = {} for i, line in enumerate(reader): if fmt == 'adaptive': try: line = extract_adaptive_sequence(i, line, v_germlines, j_germlines) except (AlignmentException, KeyError) as e: seq = VDJSequence('seq_{}'.format(i), '') add_noresults_for_vdj(session, seq, sample, str(e)) continue seq = VDJSequence(line['SEQUENCE_ID'], line['SEQUENCE_IMGT'].replace('.', '-')) if 'DUPCOUNT' in line: seq.copy_number = int(line['DUPCOUNT']) try: alignment = create_alignment(seq, line, v_germlines, j_germlines) for other in uniques.setdefault( len(alignment.sequence.sequence), []): if dnautils.equal(other.sequence.sequence, alignment.sequence.sequence): other.sequence.copy_number += ( alignment.sequence.copy_number) break else: uniques[len(alignment.sequence.sequence)].append(alignment) except AlignmentException as e: add_noresults_for_vdj(session, seq, sample, str(e)) uniques = [s for k in sorted(uniques.keys()) for s in uniques[k]] lens = [] muts = [] for unique in uniques: try: props.validate(unique) add_sequences(session, [unique], sample) lens.append(unique.v_length) muts.append(unique.v_mutation_fraction) except AlignmentException as e: add_noresults_for_vdj(session, seq, sample, str(e)) if len(lens) > 0: sample.v_ties_len = sum(lens) / len(lens) sample.v_ties_mutations = sum(muts) / len(muts) session.commit()
def read_file(session, fmt, handle, sample, v_germlines, j_germlines, props): reader = csv.DictReader(handle, delimiter='\t') uniques = {} for i, line in enumerate(reader): if fmt == 'adaptive': try: line = extract_adaptive_sequence(i, line, v_germlines, j_germlines) except (AlignmentException, KeyError) as e: seq = VDJSequence('seq_{}'.format(i), '') add_noresults_for_vdj(session, seq, sample, str(e)) continue seq = VDJSequence(line['SEQUENCE_ID'], line['SEQUENCE_IMGT'].replace('.', '-')) if 'DUPCOUNT' in line: seq.copy_number = int(line['DUPCOUNT']) try: alignment = create_alignment(seq, line, v_germlines, j_germlines) for other in uniques.setdefault(len(alignment.sequence.sequence), []): if dnautils.equal(other.sequence.sequence, alignment.sequence.sequence): other.sequence.copy_number += ( alignment.sequence.copy_number) break else: uniques[len(alignment.sequence.sequence)].append(alignment) except AlignmentException as e: add_noresults_for_vdj(session, seq, sample, str(e)) uniques = [s for k in sorted(uniques.keys()) for s in uniques[k]] lens = [] muts = [] for unique in uniques: try: props.validate(unique) add_sequences(session, [unique], sample) lens.append(unique.v_length) muts.append(unique.v_mutation_fraction) except AlignmentException as e: add_noresults_for_vdj(session, seq, sample, str(e)) if len(lens) > 0: sample.v_ties_len = sum(lens) / float(len(lens)) sample.v_ties_mutations = sum(muts) / float(len(muts)) session.commit()
def aggregate_collapse(aggregate_queue, db_config, sample_id, props): seqs_to_add = [] session = config.init_db(db_config, create=False) sample = session.query(Sample).filter(Sample.id == sample_id).one() for i, alignment in enumerate(aggregate_queue): for seq in alignment: seqs_to_add.append(seq) if len(seqs_to_add) >= 1000: add_sequences(session, seqs_to_add, sample, strip_alleles=not props.genotyping) seqs_to_add = [] session.commit() if seqs_to_add: add_sequences(session, seqs_to_add, sample, strip_alleles=not props.genotyping) logger.info('Finished aggregating sequences') session.commit() session.close()
def add_sequences_from_sample(session, sample, sequences, props): logger.info('Adding {} corrected sequences to sample {}'.format( len(sequences), sample.id)) for sequence in periodic_commit(session, sequences): alignment = sequence['alignment'] try: try: props.validate(alignment) except AlignmentException: continue if sequence['r_type'] == 'NoResult': add_sequences(session, [alignment], sample, error_action='raise') session.query(NoResult).filter( NoResult.pk == sequence['pk'] ).delete(synchronize_session=False) elif sequence['r_type'] == 'Sequence': fields = { 'partial': alignment.partial, 'probable_indel_or_misalign': alignment.has_possible_indel, 'v_gene': format_ties(alignment.v_gene), 'j_gene': format_ties(alignment.j_gene), 'num_gaps': alignment.num_gaps, 'seq_start': alignment.seq_start, 'v_match': alignment.v_match, 'v_length': alignment.v_length, 'j_match': alignment.j_match, 'j_length': alignment.j_length, 'removed_prefix': alignment.sequence.removed_prefix_sequence, 'removed_prefix_qual': alignment.sequence.removed_prefix_quality, 'v_mutation_fraction': alignment.v_mutation_fraction, 'pre_cdr3_length': alignment.pre_cdr3_length, 'pre_cdr3_match': alignment.pre_cdr3_match, 'post_cdr3_length': alignment.post_cdr3_length, 'post_cdr3_match': alignment.post_cdr3_match, 'in_frame': alignment.in_frame, 'functional': alignment.functional, 'stop': alignment.stop, 'cdr3_nt': alignment.cdr3, 'cdr3_num_nts': len(alignment.cdr3), 'cdr3_aa': lookups.aas_from_nts(alignment.cdr3), 'sequence': str(alignment.sequence.sequence), 'quality': alignment.sequence.quality, 'locally_aligned': alignment.locally_aligned, '_insertions': serialize_gaps(alignment.insertions), '_deletions': serialize_gaps(alignment.deletions), 'germline': alignment.germline } # This line doesnt actually add anything to the DB, it's just # to validate the fields Sequence(**fields) session.query(Sequence).filter( Sequence.ai == sequence['pk'] ).update(fields, synchronize_session=False) except ValueError: continue