def toJSON(self): d = {} for key in [ 'variant_types', 'so_annotations', 'ref_freqs', 'ref_acs', 'ref_hom_hemi', 'annotations', 'genes', 'exclude_genes', ]: if getattr(self, key): d[key] = getattr(self, key) if getattr(self, 'locations'): locations = [] for location1, location2 in self.locations: chrom1, pos1 = genomeloc.get_chr_pos(location1) chrom2, pos2 = genomeloc.get_chr_pos(location2) if chrom1 != chrom2: raise ValueError("locations have different chromosomes: %s:%s %s:%s" % (chrom1, pos1, chrom2, pos2)) locations.append("%s:%s-%s" % (chrom1, pos1, pos2)) d['locations'] = locations return d
def toJSON(self): d = {} for key in [ 'variant_types', 'so_annotations', 'ref_freqs', 'annotations', 'genes', 'exclude_genes', ]: if getattr(self, key): d[key] = getattr(self, key) if getattr(self, 'locations'): locations = [] for location1, location2 in self.locations: chrom1, pos1 = genomeloc.get_chr_pos(location1) chrom2, pos2 = genomeloc.get_chr_pos(location2) if chrom1 != chrom2: raise ValueError( "locations have different chromosomes: %s:%s %s:%s" % (chrom1, pos1, chrom2, pos2)) locations.append("%s:%s-%s" % (chrom1, pos1, pos2)) d['locations'] = locations return d
def family_coverage_gene(request, family, gene_id): project_id = family.project.project_id gene = get_reference().get_gene(gene_id) gene_structure = get_reference().get_gene_structure(gene_id) individuals = family.get_individuals() indiv_ids = [i.indiv_id for i in individuals] num_individuals = len(indiv_ids) coding_regions = [] for c in get_coding_regions_from_gene_structure(gene_id, gene_structure): coding_region = {} coding_region['start'] = genomeloc.get_chr_pos(c.xstart)[1] coding_region['stop'] = genomeloc.get_chr_pos(c.xstop)[1] coding_region['gene_id'] = c.gene_id coding_region['size'] = c.xstop - c.xstart + 1 coding_regions.append(coding_region) coverages = {} for individual in individuals: coverages[ individual.indiv_id] = get_coverage_store().get_coverage_for_gene( str(individual.pk), gene['gene_id']) whole_gene = Counter({'callable': 0, 'low_coverage': 0, 'poor_mapping': 0}) for coverage_spec in coverages.values(): whole_gene['callable'] += coverage_spec['gene_totals']['callable'] whole_gene['low_coverage'] += coverage_spec['gene_totals'][ 'low_coverage'] whole_gene['poor_mapping'] += coverage_spec['gene_totals'][ 'poor_mapping'] gene_coding_size = 0 for c in coding_regions: gene_coding_size += c['stop'] - c['start'] + 1 totalsize = gene_coding_size * num_individuals whole_gene['ratio_callable'] = whole_gene['callable'] / float(totalsize) whole_gene['ratio_low_coverage'] = whole_gene['low_coverage'] / float( totalsize) whole_gene['ratio_poor_mapping'] = whole_gene['poor_mapping'] / float( totalsize) whole_gene['gene_coding_size'] = gene_coding_size return render( request, 'coverage/family_coverage_gene.html', { 'project': family.project, 'family': family, 'gene': gene, 'coverages_json': json.dumps(coverages), 'whole_gene_json': json.dumps(whole_gene), 'coding_regions_json': json.dumps(coding_regions), 'indiv_ids_json': json.dumps(indiv_ids), 'individuals': individuals, 'whole_gene': whole_gene, })
def family_coverage_gene(request, family, gene_id): project_id = family.project.project_id gene = get_reference().get_gene(gene_id) gene_structure = get_reference().get_gene_structure(gene_id) individuals = family.get_individuals() indiv_ids = [i.indiv_id for i in individuals] num_individuals = len(indiv_ids) coding_regions = [] for c in get_coding_regions_from_gene_structure(gene_id, gene_structure): coding_region = {} coding_region['start'] = genomeloc.get_chr_pos(c.xstart)[1] coding_region['stop'] = genomeloc.get_chr_pos(c.xstop)[1] coding_region['gene_id'] = c.gene_id coding_region['size'] = c.xstop-c.xstart+1 coding_regions.append(coding_region) coverages = {} for individual in individuals: coverages[individual.indiv_id] = get_coverage_store().get_coverage_for_gene( str(individual.pk), gene['gene_id'] ) whole_gene = Counter({'callable': 0, 'low_coverage': 0, 'poor_mapping': 0}) for coverage_spec in coverages.values(): whole_gene['callable'] += coverage_spec['gene_totals']['callable'] whole_gene['low_coverage'] += coverage_spec['gene_totals']['low_coverage'] whole_gene['poor_mapping'] += coverage_spec['gene_totals']['poor_mapping'] gene_coding_size = 0 for c in coding_regions: gene_coding_size += c['stop']-c['start']+1 totalsize = gene_coding_size*num_individuals whole_gene['ratio_callable'] = whole_gene['callable'] / float(totalsize) whole_gene['ratio_low_coverage'] = whole_gene['low_coverage'] / float(totalsize) whole_gene['ratio_poor_mapping'] = whole_gene['poor_mapping'] / float(totalsize) whole_gene['gene_coding_size'] = gene_coding_size return render(request, 'coverage/family_coverage_gene.html', { 'project': family.project, 'family': family, 'gene': gene, 'coverages_json': json.dumps(coverages), 'whole_gene_json': json.dumps(whole_gene), 'coding_regions_json': json.dumps(coding_regions), 'indiv_ids_json': json.dumps(indiv_ids), 'individuals': individuals, 'whole_gene': whole_gene, })
def get_multiple_variants(self, project_id, family_id, xpos_ref_alt_tuples, user=None): """ Get one or more specific variants in a family Variant should be identifiable by xpos, ref, and alt Note that ref and alt are just strings from the VCF (for now) """ variant_ids = [] for xpos, ref, alt in xpos_ref_alt_tuples: chrom, pos = get_chr_pos(xpos) variant_ids.append("%s-%s-%s-%s" % (chrom, pos, ref, alt)) results = self.get_elasticsearch_variants( project_id, family_id=family_id, variant_id_filter=variant_ids, user=user) # make sure all variants in xpos_ref_alt_tuples were retrieved and are in the same order. # Return None for tuples that weren't found in ES. results_by_xpos_ref_alt = {} for r in results: results_by_xpos_ref_alt[(r.xpos, r.ref, r.alt)] = r # create a list that's the same length as the input list of xpos_ref_alt_tuples, putting None for # xpos-ref-alt's that weren't found in the elasticsearch index results = [results_by_xpos_ref_alt.get(t) for t in xpos_ref_alt_tuples] return results
def get_single_variant(self, project_id, family_id, xpos, ref, alt, user=None): chrom, pos = get_chr_pos(xpos) variant_id = "%s-%s-%s-%s" % (chrom, pos, ref, alt) results = list( self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=[variant_id], user=user, include_all_consequences=True)) if not results: return None if len(results) > 1: raise ValueError( "Multiple variant records found for project: %s family: %s %s-%s-%s-%s: \n %s" % (project_id, family_id, chrom, pos, ref, alt, "\n".join( [pformat(v.toJSON()) for v in results]))) variant = results[0] return variant
def get_multiple_variants(self, project_id, family_id, xpos_ref_alt_tuples, user=None): """ Get one or more specific variants in a family Variant should be identifiable by xpos, ref, and alt Note that ref and alt are just strings from the VCF (for now) """ variant_ids = [] for xpos, ref, alt in xpos_ref_alt_tuples: chrom, pos = get_chr_pos(xpos) if chrom == 'M': chrom = 'MT' variant_ids.append("%s-%s-%s-%s" % (chrom, pos, ref, alt)) results = self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=variant_ids, user=user) # make sure all variants in xpos_ref_alt_tuples were retrieved and are in the same order. # Return None for tuples that weren't found in ES. results_by_xpos_ref_alt = {} for r in results: results_by_xpos_ref_alt[(r.xpos, r.ref, r.alt)] = r # create a list that's the same length as the input list of xpos_ref_alt_tuples, putting None for # xpos-ref-alt's that weren't found in the elasticsearch index results = [results_by_xpos_ref_alt.get(t) for t in xpos_ref_alt_tuples] return results
def __init__(self, xpos, ref, alt): self.xpos = xpos self.ref = ref self.alt = alt # TODO: should be implemented in genomeloc.py self.xposx = xpos if len(ref) == 1 and len(alt) > 1: # insertion self.xposx += len(alt) - 1 elif len(ref) > 1 and len(alt) == 1: # deletion self.xposx -= 1 elif len(ref) > 1 and len(alt) > 1: # multi base sub self.xposx += len(alt) - 1 chrom, pos = genomeloc.get_chr_pos(self.xpos) self.chr = chrom self.pos = pos self.pos_end = self.xposx % 1e9 # TODO: feels like this should be an ordered dict self.genotypes = {} self.extras = {} self.annotation = None self.gene_ids = None self.coding_gene_ids = None self.vcf_id = None self.vartype = 'snp' if len(ref) == 1 and len(alt) == 1 else 'indel'
def toJSON(self): d = {} for key in [ 'variant_types', 'so_annotations', 'ref_freqs', 'annotations', 'genes', "exclude_genes" ]: if getattr(self, key): d[key] = getattr(self, key) if getattr(self, 'locations'): d['locations'] = [ "%s:%s-%s" % (genomeloc.get_chr_pos(locA)[0], genomeloc.get_chr_pos(locA)[1], genomeloc.get_chr_pos(locB)[1]) for locA, locB in self.locations ] return d
def get_multiple_variants(self, project_id, family_id, xpos_ref_alt_tuples): """ Get one or more specific variants in a family Variant should be identifiable by xpos, ref, and alt Note that ref and alt are just strings from the VCF (for now) """ variant_ids = [] for xpos, ref, alt in xpos_ref_alt_tuples: chrom, pos = get_chr_pos(xpos) variant_ids.append("%s-%s-%s-%s" % (chrom, pos, ref, alt)) cache_key = (project_id, family_id, tuple(xpos_ref_alt_tuples)) if cache_key in self._results_cache: results = self._results_cache[cache_key] else: results = list( self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=variant_ids)) # make sure all variants in xpos_ref_alt_tuples were retrieved and are in the same order. # Return None for tuples that weren't found in ES. results_by_xpos_ref_alt = {} for r in results: results_by_xpos_ref_alt[(r.xpos, r.ref, r.alt)] = r results = [ results_by_xpos_ref_alt.get(t) for t in xpos_ref_alt_tuples ] self._results_cache[cache_key] = results return results
def get_single_variant(self, project_id, family_id, xpos, ref, alt): chrom, pos = get_chr_pos(xpos) variant_id = "%s-%s-%s-%s" % (chrom, pos, ref, alt) cache_key = (project_id, family_id, xpos, ref, alt) if cache_key in self._results_cache: results = self._results_cache[cache_key] else: results = list( self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=[variant_id ])) self._results_cache[cache_key] = results if not results: return None if len(results) > 1: raise ValueError( "Multiple variant records found for project: %s family: %s %s-%s-%s-%s: \n %s" % (project_id, family_id, chrom, pos, ref, alt, "\n".join( [pformat(v.toJSON()) for v in results]))) variant = results[0] return variant
def toDict(self): genes = [{ 'gene' : bg.gene_symbol, 'cds_dist': bg.cds_dist } for bg in self.breakpointgene_set.all()] chr,pos = genomeloc.get_chr_pos(self.xpos) return { 'xpos' : self.xpos, 'chr' : chr, 'pos' : pos, 'obs' : self.obs, 'sample_count' : self.sample_count, 'consensus' : self.consensus, 'indiv_id' : self.individual.indiv_id, 'genes' : genes, }
def toList(self): genes = [{ 'gene' : bg.gene_symbol, 'cds_dist': bg.cds_dist } for bg in self.breakpointgene_set.all()] chr,pos = genomeloc.get_chr_pos(self.xpos) return [ self.xpos, chr, pos, self.obs, self.sample_count, self.consensus, self.partner, self.individual.indiv_id, genes, ]
def toDict(self): genes = [{ 'gene': bg.gene_symbol, 'cds_dist': bg.cds_dist } for bg in self.breakpointgene_set.all()] chr, pos = genomeloc.get_chr_pos(self.xpos) return { 'xpos': self.xpos, 'chr': chr, 'pos': pos, 'obs': self.obs, 'sample_count': self.sample_count, 'consensus': self.consensus, 'indiv_id': self.individual.indiv_id, 'genes': genes, }
def get_single_variant(self, project_id, family_id, xpos, ref, alt, user=None): chrom, pos = get_chr_pos(xpos) if chrom == 'M': chrom = 'MT' variant_id = "%s-%s-%s-%s" % (chrom, pos, ref, alt) results = list(self.get_elasticsearch_variants(project_id, family_id=family_id, variant_id_filter=[variant_id], user=user, include_all_consequences=True)) if not results: return None if len(results) > 1: raise ValueError("Multiple variant records found for project: %s family: %s %s-%s-%s-%s: \n %s" % ( project_id, family_id, chrom, pos, ref, alt, "\n".join([pformat(v.toJSON()) for v in results]))) variant = results[0] return variant
def toList(self): genes = [{ 'gene': bg.gene_symbol, 'cds_dist': bg.cds_dist } for bg in self.breakpointgene_set.all()] chr, pos = genomeloc.get_chr_pos(self.xpos) return [ self.xpos, chr, pos, self.obs, self.sample_count, self.consensus, self.partner, self.individual.indiv_id, genes, ]