def get_shared_amino_acids_counts(self, **kwargs): ''' Given a comparison, returns the amino acids shared by all clonofilters within a comparison By default returns the first 10 results, but can return different page or number of results by modifying kwargs Returns a nested dict in the format: {aa1_id: {'sequence': aa.sequence, 'clonofiters':{ cf_id1: float, cf_id2: float,}} ''' from clonotypes.models import AminoAcid from collections import defaultdict from django.db.models import Q # Get a list of clonofilters clonofilters = self.clonofilters_all() # Get a list of sample ids samples = self.get_samples() # Get a dict of sample id to clonofilter ids sampleid2cfid = defaultdict(lambda: []) for cf in clonofilters: sampleid2cfid[cf.sample.id].append(cf.id) # prefect clonofilters and index by their id cfid2cf = dict((cf.id, cf) for cf in clonofilters) # Set up the paginator try: paginator = Paginator(self.get_shared_amino_acids(), kwargs['per_page']) except: paginator = Paginator(self.get_shared_amino_acids(), 10) # Get the page number try: shared_amino_acids = paginator.page(kwargs['page']) except EmptyPage: shared_amino_acids = paginator.page(paginator.num_pages) except: shared_amino_acids = paginator.page(1) # Get all nucleotide_sequences belonging to samples and then only report those # that have at least len(samples) shared clonotypes returnable = {} #for amino_acid in shared_amino_acids: for amino_acid in shared_amino_acids: amino_acid_dict = {} clonofilter_dict = defaultdict(lambda: 0) for recombination in amino_acid.recombination_set.all(): for clonotype in recombination.clonotype_set.all(): if clonotype.sample in samples: for cfid in sampleid2cfid[clonotype.sample.id]: clonofilter_dict[cfid] += 1.0*clonotype.count/cfid2cf[cfid].size() amino_acid_dict['clonofilters'] = undefaulted(clonofilter_dict) amino_acid_dict['sequence'] = amino_acid.sequence returnable[amino_acid.id] = amino_acid_dict return returnable, paginator.num_pages, paginator.count
def hits_by_article(self): ''' Returns a dict of BlatHit objects where an article is the key ''' from collections import defaultdict from utils.utils import undefaulted hit_dict = defaultdict(list) hits = self.hits(); for hit in hits: hit_dict[hit.article].append(hit); return undefaulted(hit_dict)
def get_shared_recombinations_counts(self): ''' Given a comparison, returns the recombinations shared by all samples Returns a nested dict in the format: {'recombination.id': {'clonofilter': <sum_of_norm_counts>, 'clonofilter2': <sum of norm counts>}} ''' from clonotypes.models import Recombination from collections import defaultdict # Get all nucleotide_sequences belonging to samples and then only report those # that have at least len(samples) shared clonotypes returnable = defaultdict(lambda: defaultdict(lambda: .0)) # Get a list of clonofilters clonofilters = self.clonofilters_all() # Get a list of sample ids samples = self.get_samples() if len(samples) > 1: # Now get all clonotypes with these shared sequences shared_recombinations = reduce(lambda q, s: q.filter( clonotype__sample=s), samples, self.get_recombinations()) shared_recombination = shared_recombinations.distinct() # Format the shared clonotypes as a dict of lists: # {'sequence': [<clonotype 1>, <clonotype2>]} for recombination in shared_recombinations: # Get the set of clonotypes for each recombination for clonotype in recombination.clonotype_set.all(): if clonotype.sample in samples: returnable[recombination.nucleotide][ clonotype.sample] += clonotype.count # Get the set of samples for each clonotype # Add # of reads per thing # returnable[recombination.nucleotide].append(clonotype) return undefaulted(returnable)
def get_shared_amino_acids_clonotypes(self): ''' Given a comparison, returns the amino acids shared by all clonofilters within a comparison Returns a nested dict in the format: {'amino_acid': {'clonofilter': <sum_of_norm_counts>, 'clonofilter2': <sum of norm counts>}} ''' from clonotypes.models import AminoAcid from collections import defaultdict from django.db.models import Q # Get a list of sample ids samples = self.get_samples() # Get all nucleotide_sequences belonging to samples and then only report those # that have at least len(samples) shared clonotypes returnable = defaultdict(lambda: defaultdict(lambda: .0)) shared_amino_acid = self.get_shared_amino_acids() if shared_amino_acid: for amino_acid in shared_amino_acid: for recombination in amino_acid.recombination_set.all(): for clonotype in recombination.clonotype_set.all(): if clonotype.sample in samples: returnable[ amino_acid][clonotype.sample] = clonotype return undefaulted(returnable)