Example #1
0
    def get_shared_amino_acids_counts(self, **kwargs):
        '''
        Given a comparison, returns the amino acids shared by all clonofilters within a comparison
        By default returns the first 10 results, but can return different page
        or number of results by modifying kwargs
        Returns a nested dict in the format:
            {aa1_id: {'sequence': aa.sequence, 'clonofiters':{ cf_id1: float,
                              cf_id2: float,}}
        '''
        from clonotypes.models import AminoAcid
        from collections import defaultdict
        from django.db.models import Q

        # Get a list of clonofilters
        clonofilters = self.clonofilters_all()
        # Get a list of sample ids
        samples = self.get_samples()
        # Get a dict of sample id to clonofilter ids
        sampleid2cfid = defaultdict(lambda: [])
        for cf in clonofilters:
            sampleid2cfid[cf.sample.id].append(cf.id)
        # prefect clonofilters and index by their id
        cfid2cf = dict((cf.id, cf) for cf in clonofilters)

        # Set up the paginator
        try:
            paginator = Paginator(self.get_shared_amino_acids(), kwargs['per_page'])
        except:
            paginator = Paginator(self.get_shared_amino_acids(), 10)

        # Get the page number
        try:
            shared_amino_acids = paginator.page(kwargs['page'])
        except EmptyPage:
            shared_amino_acids = paginator.page(paginator.num_pages)
        except:
            shared_amino_acids = paginator.page(1)

        # Get all nucleotide_sequences belonging to samples and then only report those
        # that have at least len(samples) shared clonotypes
        returnable =  {}

        #for amino_acid in shared_amino_acids:
        for amino_acid in shared_amino_acids:
            amino_acid_dict = {}
            clonofilter_dict = defaultdict(lambda: 0)
            for recombination in amino_acid.recombination_set.all():
                for clonotype in recombination.clonotype_set.all():
                    if clonotype.sample in samples:
                        for cfid in sampleid2cfid[clonotype.sample.id]:
                            clonofilter_dict[cfid] += 1.0*clonotype.count/cfid2cf[cfid].size()
            amino_acid_dict['clonofilters'] = undefaulted(clonofilter_dict)
            amino_acid_dict['sequence'] = amino_acid.sequence
            returnable[amino_acid.id] = amino_acid_dict

        return returnable, paginator.num_pages, paginator.count
Example #2
0
    def hits_by_article(self):
        '''
        Returns a dict of BlatHit objects where an article is the key
        '''
        from collections import defaultdict
        from utils.utils import undefaulted

        hit_dict = defaultdict(list)
        hits = self.hits();
        for hit in hits:
            hit_dict[hit.article].append(hit);
        return undefaulted(hit_dict)
Example #3
0
    def get_shared_recombinations_counts(self):
        '''
        Given a comparison, returns the recombinations shared by all samples
        Returns a nested dict in the format:
            {'recombination.id': {'clonofilter': <sum_of_norm_counts>, 'clonofilter2': <sum of norm counts>}}
        '''
        from clonotypes.models import Recombination
        from collections import defaultdict

        # Get all nucleotide_sequences belonging to samples and then only report those
        # that have at least len(samples) shared clonotypes
        returnable = defaultdict(lambda: defaultdict(lambda: .0))

        # Get a list of clonofilters
        clonofilters = self.clonofilters_all()
        # Get a list of sample ids
        samples = self.get_samples()

        if len(samples) > 1:
            # Now get all clonotypes with these shared sequences
            shared_recombinations = reduce(lambda q, s: q.filter(
                clonotype__sample=s), samples, self.get_recombinations())
            shared_recombination = shared_recombinations.distinct()

            # Format the shared clonotypes as a dict of lists:
            # {'sequence': [<clonotype 1>, <clonotype2>]}
            for recombination in shared_recombinations:
                # Get the set of clonotypes for each recombination
                for clonotype in recombination.clonotype_set.all():
                    if clonotype.sample in samples:
                        returnable[recombination.nucleotide][
                            clonotype.sample] += clonotype.count
                    # Get the set of samples for each clonotype
                    # Add # of reads per thing
#                returnable[recombination.nucleotide].append(clonotype)

        return undefaulted(returnable)
Example #4
0
    def get_shared_amino_acids_clonotypes(self):
        '''
        Given a comparison, returns the amino acids shared by all clonofilters within a comparison
        Returns a nested dict in the format:
            {'amino_acid': {'clonofilter': <sum_of_norm_counts>, 'clonofilter2': <sum of norm counts>}}
        '''
        from clonotypes.models import AminoAcid
        from collections import defaultdict
        from django.db.models import Q

        # Get a list of sample ids
        samples = self.get_samples()
        # Get all nucleotide_sequences belonging to samples and then only report those
        # that have at least len(samples) shared clonotypes
        returnable = defaultdict(lambda: defaultdict(lambda: .0))
        shared_amino_acid = self.get_shared_amino_acids()
        if shared_amino_acid:
            for amino_acid in shared_amino_acid:
                for recombination in amino_acid.recombination_set.all():
                    for clonotype in recombination.clonotype_set.all():
                        if clonotype.sample in samples:
                            returnable[
                                amino_acid][clonotype.sample] = clonotype
        return undefaulted(returnable)