def __init__(self, glfo, args): self.glfo = glfo self.args = args self.mfreqer = MuteFreqer(self.glfo, exclusions=args.region_end_exclusions) self.reco_total = 0 # total number of recombination events self.mute_total = 0 # total number of sequences self.counts = {} self.counts['all'] = {} for column in utils.column_dependencies: self.counts[column] = {} self.string_columns = set([r + '_gene' for r in utils.regions]) for bound in utils.boundaries: self.counts[bound + '_insertion_content'] = { n: 0 for n in utils.nukes } # base content of each insertion self.string_columns.add(bound + '_insertion_content') self.counts['cdr3_length'] = {} self.counts['seq_content'] = { n: 0 for n in utils.nukes } # now I'm adding the aa content, I wish this had nucleotide in the name, but I don't want to change it since it corresponds to a million existing file paths self.init_aa_stuff() self.counts['seq_aa_content'] = {a: 0 for a in self.all_aa} self.string_columns.add('seq_content') self.string_columns.add('seq_aa_content') self.no_write_columns = [ 'cdr3_length', 'seq_aa_content' ] # don't write these to the parameter dir, since a) cdr3_length is better viewed as an output of more fundamental parameters (gene choice, insertion + deletion lengths) and b) I"m adding them waaay long after the others, and I don't want to add a new file to the established parameter directory structure. (I'm adding these because I want them plotted) self.columns_to_subset_by_gene = [ e + '_del' for e in utils.all_erosions ] + [b + '_insertion' for b in utils.boundaries]
def __init__(self, germline_seqs): #, base_outdir='', plotdir='', write_parameters=True, plot_parameters=True): self.reco_total = 0 # total number of recombination events self.mute_total = 0 # total number of sequences self.counts = {} self.counts['all'] = {} for column in utils.column_dependencies: self.counts[column] = {} for bound in utils.boundaries: self.counts[bound + '_insertion_content'] = {n : 0 for n in utils.nukes} # base content of each insertion self.counts['seq_content'] = {n : 0 for n in utils.nukes} self.mutefreqer = MuteFreqer(germline_seqs) #, self.base_outdir, self.plotdir, write_parameters=self.write_parameters, plot_parameters=self.plot_parameters)
def __init__(self, glfo, args): self.glfo = glfo self.args = args self.mfreqer = MuteFreqer(self.glfo) self.reco_total = 0 # total number of recombination events self.mute_total = 0 # total number of sequences self.counts = {} self.counts['all'] = {} for column in utils.column_dependencies: self.counts[column] = {} self.string_columns = set([r + '_gene' for r in utils.regions]) for bound in utils.boundaries: self.counts[bound + '_insertion_content'] = {n : 0 for n in utils.nukes} # base content of each insertion self.string_columns.add(bound + '_insertion_content') self.counts['seq_content'] = {n : 0 for n in utils.nukes} self.string_columns.add('seq_content') self.columns_to_subset_by_gene = [e + '_del' for e in utils.all_erosions] + [b + '_insertion' for b in utils.boundaries]
def __init__( self, germline_seqs ): #, base_outdir='', plotdir='', write_parameters=True, plot_parameters=True): self.total = 0 self.counts = {} self.counts['all'] = {} for column in utils.column_dependencies: self.counts[column] = {} for bound in utils.boundaries: self.counts[bound + '_insertion_content'] = { 'A': 0, 'C': 0, 'G': 0, 'T': 0 } # base content of each insertion self.counts['seq_content'] = {'A': 0, 'C': 0, 'G': 0, 'T': 0} self.mutefreqer = MuteFreqer( germline_seqs ) #, self.base_outdir, self.plotdir, write_parameters=self.write_parameters, plot_parameters=self.plot_parameters)