예제 #1
0
    def __init__(self, glfo, args):
        self.glfo = glfo
        self.args = args
        self.mfreqer = MuteFreqer(self.glfo,
                                  exclusions=args.region_end_exclusions)
        self.reco_total = 0  # total number of recombination events
        self.mute_total = 0  # total number of sequences
        self.counts = {}
        self.counts['all'] = {}
        for column in utils.column_dependencies:
            self.counts[column] = {}
        self.string_columns = set([r + '_gene' for r in utils.regions])
        for bound in utils.boundaries:
            self.counts[bound + '_insertion_content'] = {
                n: 0
                for n in utils.nukes
            }  # base content of each insertion
            self.string_columns.add(bound + '_insertion_content')
        self.counts['cdr3_length'] = {}
        self.counts['seq_content'] = {
            n: 0
            for n in utils.nukes
        }  # now I'm adding the aa content, I wish this had nucleotide in the name, but I don't want to change it since it corresponds to a million existing file paths
        self.init_aa_stuff()
        self.counts['seq_aa_content'] = {a: 0 for a in self.all_aa}
        self.string_columns.add('seq_content')
        self.string_columns.add('seq_aa_content')

        self.no_write_columns = [
            'cdr3_length', 'seq_aa_content'
        ]  # don't write these to the parameter dir, since a) cdr3_length is better viewed as an output of more fundamental parameters (gene choice, insertion + deletion lengths) and b) I"m adding them waaay long after the others, and I don't want to add a new file to the established parameter directory structure. (I'm adding these because I want them plotted)

        self.columns_to_subset_by_gene = [
            e + '_del' for e in utils.all_erosions
        ] + [b + '_insertion' for b in utils.boundaries]
예제 #2
0
 def __init__(self, germline_seqs):   #, base_outdir='', plotdir='', write_parameters=True, plot_parameters=True):
     self.reco_total = 0  # total number of recombination events
     self.mute_total = 0  # total number of sequences
     self.counts = {}
     self.counts['all'] = {}
     for column in utils.column_dependencies:
         self.counts[column] = {}
     for bound in utils.boundaries:
         self.counts[bound + '_insertion_content'] = {n : 0 for n in utils.nukes}  # base content of each insertion
     self.counts['seq_content'] = {n : 0 for n in utils.nukes}
     self.mutefreqer = MuteFreqer(germline_seqs)  #, self.base_outdir, self.plotdir, write_parameters=self.write_parameters, plot_parameters=self.plot_parameters)
예제 #3
0
    def __init__(self, glfo, args):
        self.glfo = glfo
        self.args = args
        self.mfreqer = MuteFreqer(self.glfo)
        self.reco_total = 0  # total number of recombination events
        self.mute_total = 0  # total number of sequences
        self.counts = {}
        self.counts['all'] = {}
        for column in utils.column_dependencies:
            self.counts[column] = {}
        self.string_columns = set([r + '_gene' for r in utils.regions])
        for bound in utils.boundaries:
            self.counts[bound + '_insertion_content'] = {n : 0 for n in utils.nukes}  # base content of each insertion
            self.string_columns.add(bound + '_insertion_content')
        self.counts['seq_content'] = {n : 0 for n in utils.nukes}
        self.string_columns.add('seq_content')

        self.columns_to_subset_by_gene = [e + '_del' for e in utils.all_erosions] + [b + '_insertion' for b in utils.boundaries]
 def __init__(
     self, germline_seqs
 ):  #, base_outdir='', plotdir='', write_parameters=True, plot_parameters=True):
     self.total = 0
     self.counts = {}
     self.counts['all'] = {}
     for column in utils.column_dependencies:
         self.counts[column] = {}
     for bound in utils.boundaries:
         self.counts[bound + '_insertion_content'] = {
             'A': 0,
             'C': 0,
             'G': 0,
             'T': 0
         }  # base content of each insertion
     self.counts['seq_content'] = {'A': 0, 'C': 0, 'G': 0, 'T': 0}
     self.mutefreqer = MuteFreqer(
         germline_seqs
     )  #, self.base_outdir, self.plotdir, write_parameters=self.write_parameters, plot_parameters=self.plot_parameters)