def print_stuff(line):
    cluster_index = sorted_clusters.index(cluster)
    naive_cdr3, matureiseq0_cdr3 = utils.subset_sequences(
        line, iseq=0, restrict_to_region='cdr3'
    )  # line['naive_seq'][(line['codon_positions']['v']):((line['codon_positions']['j'])+3)] #get nt sequence of CDR3 from first base of cysteine through last base of tryptophan
    # mature_cdr3_seqs = []  # trying to translate the consensus cdr3 so I can search these with my seed seqs
    #     for iseq in range(len(line['unique_ids'])):
    #         naive_cdr3_seq, mature_cdr3_seq = utils.subset_sequences(line, iseq=iseq, restrict_to_region='cdr3')
    #         mature_cdr3_seqs.append(mature_cdr3_seq)
    # translated_cdr3 = Seq().... not done
    cdr3_aa = '%-30s' % Seq(naive_cdr3).translate()
    if any('-ig' in s for s in line['unique_ids']):
        cdr3_aa = utils.color('red', cdr3_aa, width=30)
    print '%4s     %s %s %s %5d %5d %5d %7.3f   %8.4f     %2d   %s %4.2f' % (
        cluster_index,
        utils.color_gene(line['v_gene'], width=15),
        utils.color_gene(line['d_gene'], width=15),
        utils.color_gene(line['j_gene'], width=10),
        len(line['unique_ids']),
        numpy.mean(line['n_mutations']),
        numpy.median(line['n_mutations']),
        numpy.mean(line['mut_freqs']),
        float(len(cluster)) / n_total,
        (line['cdr3_length'] / 3),
        cdr3_aa,
        utils.fay_wu_h(line, debug=False),
    )
Ejemplo n.º 2
0
 def get_cdr3_title(self, annotation):
     naive_cdr3_seq, _ = utils.subset_sequences(annotation,
                                                iseq=0,
                                                restrict_to_region='cdr3')
     title = ''
     if len(naive_cdr3_seq) % 3 != 0:
         # print '  out of frame: adding %s' % ((3 - len(naive_cdr3_seq) % 3) * 'N')
         naive_cdr3_seq += (3 - len(naive_cdr3_seq) % 3) * 'N'
         title += ' (out of frame)'
     title = self.Seq.Seq(naive_cdr3_seq).translate() + title
     return title
Ejemplo n.º 3
0
def print_stuff(line):
    cluster_index = sorted_clusters.index(cluster)
    naive_cdr3, matureiseq0_cdr3 = utils.subset_sequences(line, iseq=0, restrict_to_region='cdr3') # returns the CDR3 nt sequence for naive, and the first mutated sequence (iseq0); CDR3 = first base of cysteine through last base of tryptophan

    # mature_cdr3_seqs = []  # trying to translate the consensus cdr3 so I can search these with my seed seqs
    # for iseq in range(len(line['unique_ids'])):
    #     naive_cdr3_seq, mature_cdr3_seq = utils.subset_sequences(line, iseq=iseq, restrict_to_region='cdr3')
    #     mature_cdr3_seqs.append(mature_cdr3_seq)
    # mature_cdr3_seqs
    # translated_cdr3 = mature_cdr3_seqs.translate()

    cdr3_aa = '%-30s' % Seq(naive_cdr3).translate()
    # If a cluster contains one of our seed seqs, color this CDR3 red
    if any('-ig' in s for s in line['unique_ids']):
        cdr3_aa = utils.color('red', cdr3_aa, width=30)
    if args.cdr3 in cdr3_aa: # Only print clusters with naive CDR3 that matches our specified --cdr3 argument
        print 'index    genes                                        size    n muts    SHM     rep frac     CDR3                                FayWuH'
        print '                                                            mean  med                        len  seq'
        print '%4s     %s %s %s %5d %5d %5d %7.3f   %8.4f     %2d   %s %4.2f' % (
                cluster_index,
                utils.color_gene(line['v_gene'], width=15),
                utils.color_gene(line['d_gene'], width=15),
                utils.color_gene(line['j_gene'], width=10),
                len(line['unique_ids']),
                numpy.mean(line['n_mutations']),
                numpy.median(line['n_mutations']),
                numpy.mean(line['mut_freqs']),
                float(len(cluster)) / n_total,
                (line['cdr3_length']/3),
                cdr3_aa,
                utils.fay_wu_h(line, debug=False),
                )
        # print 'number of mutations per sequence in cluster', sorted(line['n_mutations'])
        print len(line['naive_seq']), 'length of naive seq'
        # utils.print_reco_event(utils.synthesize_single_seq_line(line, iseq=0))  # print ascii-art representation of the rearrangement event
        print 'unique_ids: ', getkey(line['unique_ids'])
        print
        print utils.print_reco_event(line)
Ejemplo n.º 4
0
def naive_cdr3(info):
    naiveseq, _ = utils.subset_sequences(info,
                                         iseq=0,
                                         restrict_to_region='cdr3')
    return naiveseq
def print_stuff(line):
    intscore = 0  # create a clonal family scoring system
    cluster_index = sorted_clusters.index(cluster)
    shm_index = shm_clusters.index(cluster)
    naive_cdr3, matureiseq0_cdr3 = utils.subset_sequences(
        line, iseq=0, restrict_to_region='cdr3'
    )  # line['naive_seq'][(line['codon_positions']['v']):((line['codon_positions']['j'])+3)] #get nt sequence of CDR3 from first base of cysteine through last base of tryptophan
    # mature_cdr3_seqs = []  # trying to translate the consensus cdr3 so I can search these with my seed seqs
    #     for iseq in range(len(line['unique_ids'])):
    #         naive_cdr3_seq, mature_cdr3_seq = utils.subset_sequences(line, iseq=iseq, restrict_to_region='cdr3')
    #         mature_cdr3_seqs.append(mature_cdr3_seq)
    # translated_cdr3 = Seq().... not done
    cdr3_aa = '%-30s' % Seq(naive_cdr3).translate()
    if any('-ig' in s for s in line['unique_ids']):
        cdr3_aa = utils.color('red', cdr3_aa, width=30)

    # score clusters based on cluster size
    if cluster_index < 25:
        intscore = intscore + 4
    elif cluster_index >= 25 and cluster_index <= 50:
        intscore = intscore + 3
    elif cluster_index >= 50 and cluster_index <= 75:
        intscore = intscore + 2
    elif cluster_index >= 75 and cluster_index <= 100:
        intscore = intscore + 1

    # score clusters based on SHM
    if shm_index < 25:
        intscore = intscore + 4
    elif shm_index >= 25 and shm_index <= 50:
        intscore = intscore + 3
    elif shm_index >= 50 and shm_index <= 75:
        intscore = intscore + 2
    elif shm_index >= 75 and shm_index <= 100:
        intscore = intscore + 1

    # score clusters based on SFS
    if utils.fay_wu_h(line, debug=False) <= -20:
        intscore = intscore + 4
    elif utils.fay_wu_h(line, debug=False) <= -10:
        intscore = intscore + 3
    elif utils.fay_wu_h(line, debug=False) <= 0:
        intscore = intscore + 2
    elif utils.fay_wu_h(line, debug=False) <= 10:
        intscore = intscore + 1

    # score by bnAb gene usage
    if (line['v_gene']).split('*')[0] in (
            cd4bs_genes or glycan_genes or bridging_genes or mper_genes
    ):  # beware this does not include CDR3 length of bnAb VH genes
        intscore = intscore + 4

    print '%4s %4s     %s %s %s %5d %5d %5d %7.3f   %8.4f     %2d   %s %4.2f' % (
        intscore,
        cluster_index,
        utils.color_gene(line['v_gene'], width=15),
        utils.color_gene(line['d_gene'], width=15),
        utils.color_gene(line['j_gene'], width=10),
        len(line['unique_ids']),
        numpy.mean(line['n_mutations']),
        numpy.median(line['n_mutations']),
        numpy.mean(line['mut_freqs']),
        float(len(cluster)) / n_total,
        (line['cdr3_length'] / 3),
        cdr3_aa,
        utils.fay_wu_h(line, debug=False),
    )