def test_get_metrics(self): # Create a variant object new_variant = Variant(chr=self.chromo, pos=1, ref="jon", alt="ba", md5="kid", type=self.varianttype,) new_variant.save() response = self.client.get('/api/variants/{0}/assessment-metrics/'. format(new_variant.id), HTTP_ACCEPT='application/json') # Should be ok self.assertEqual(response.status_code, codes.ok)
def test_get_all(self): count_before = Variant.objects.count() new_variant = Variant(chr=self.chromo, pos=1, ref="jon", alt="ba", md5="kid", type=self.varianttype,) new_variant.save() count_after = Variant.objects.count() # Now try making a get request with the objects id response = self.client.get('/api/variants/{0}/'.format(new_variant.id), HTTP_ACCEPT='application/json') # Request should be good and the count should have changed self.assertEqual(response.status_code, codes.ok) self.assertEqual(count_after, count_before+1)
def get_variant(self, record): "Get or create a variant." chrom, pos, ref, alt = record.CHROM, record.POS, record.REF, '/'.join([str(x) for x in record.ALT]) # Calculate MD5 and attempt to fetch the primary key from # the local cache, otherwise use it when inserting. md5 = calculate_md5(chrom, pos, ref, alt) # Ensure the cache is valid for the chromosome self.check_cache(chrom) variant_id = self.variant_cache.get(md5, None) # Just make a faux instance if variant_id: variant = Variant(pk=variant_id) # Create if it does not exist else: variant = Variant(pos=pos, ref=ref, alt=alt, md5=md5) # Update foreign key references variant.chr = self.get_chromosome(chrom) variant.type = self.get_variant_type(record.var_type.upper()) # Periods are useless.. variant.rsid = record.ID == '.' and None or record.ID variant.save() self.file_variants += 1 # Update cache self.variant_cache[md5] = variant.pk # Process SNPEff data if this is the first time this variant # has been seen. if 'EFF' in record.INFO: effs = record.INFO['EFF'].split(',') self.load_effects(effs, variant) return variant
def get_variant(self, record): "Get or create a variant." chrom, pos, ref, alt = record.CHROM, record.POS, record.REF, '/'.join( [str(x) for x in record.ALT]) # Calculate MD5 and attempt to fetch the primary key from # the local cache, otherwise use it when inserting. md5 = calculate_md5(chrom, pos, ref, alt) # Ensure the cache is valid for the chromosome self.check_cache(chrom) variant_id = self.variant_cache.get(md5, None) # Just make a faux instance if variant_id: variant = Variant(pk=variant_id) # Create if it does not exist else: variant = Variant(pos=pos, ref=ref, alt=alt, md5=md5) # Update foreign key references variant.chr = self.get_chromosome(chrom) variant.type = self.get_variant_type(record.var_type.upper()) # Periods are useless.. variant.rsid = record.ID == '.' and None or record.ID variant.save() self.file_variants += 1 # Update cache self.variant_cache[md5] = variant.pk # Process SNPEff data if this is the first time this variant # has been seen. if 'EFF' in record.INFO: effs = record.INFO['EFF'].split(',') self.load_effects(effs, variant) return variant
def load_hgmd_snp(self, cursor, using=None): cursor.execute(''' select distinct cl.acc_num as hgmd_id, c.value as chr, c.id as chr_id, v.id as variant_id, trim(both from cl.disease) as phenotype, ph.id as phenotype_id, cl.gene as gene, g.id as gene_id, cl.pmid as pubmed, pm.pmid as pubmed_id from ( select m.acc_num, m.disease, m.gene, m.pmid, c.chromosome, c."coordSTART" as pos from raw.hgmd_mutation m inner join raw.hgmd_hg19_coords c on (m.acc_num = c.acc_num) ) cl left outer join chromosome c on (cl.chromosome = c.value) left outer join variant v on (c.id = v.chr_id and cl.pos = v.pos) left outer join variant_type vt on (v.type_id = vt.id) left outer join phenotype ph on (lower(trim(both from regexp_replace(cl.disease, '\s*\?$', ''))) = lower(ph.term)) left outer join pubmed pm on (cl.pmid::varchar = pm.pmid::varchar) left outer join gene g on (cl.gene::text = g.symbol::text) left outer join variant_phenotype vp on (vp.variant_id = v.id) where vt.value = 'SNP' and cl.disease not like '%%?' and v.id is not null and g.id is not null order by c.id ''') keys = ['hgmd_id', 'chr', 'chr_id', 'variant_id', 'phenotype', 'phenotype_id', 'gene', 'gene_id', 'pubmed', 'pubmed_id'] count = 0 new_pubmed_map = {} new_phenotype_map = {} chrs = dict(Chromosome.objects.values_list('value', 'id')) while True: rows = cursor.fetchmany(100) if not rows: break for row in rows: record = dict(zip(keys, row)) # Get or create a pubmed record if record['pubmed_id']: pubmed = PubMed(pmid=record['pubmed_id']) pubmed._state.db = using # Some records have a bogus PMID. Only proces the valid ones. elif type(record['pubmed']) is int or record['pubmed'].isdigit(): pmid = int(record['pubmed']) if pmid in new_pubmed_map: pubmed = new_pubmed_map[pmid] else: pubmed = PubMed(pmid=pmid) pubmed.save() new_pubmed_map[pmid] = pubmed else: pubmed = None # Get or create a the phenotype, associate the HGMD id with if record['phenotype_id']: phenotype = Phenotype(pk=record['phenotype_id']) phenotype._state.db = using else: term = record['phenotype'] # Check newly added objects if term in new_phenotype_map: phenotype = new_phenotype_map[term] else: phenotype = Phenotype(term=record['phenotype']) phenotype.save() new_phenotype_map[term] = phenotype _chr = Chromosome(pk=chrs[record['chr']]) _chr._state.db = using if record['gene_id']: gene = Gene(pk=record['gene_id']) gene._state.db = using try: gp = GenePhenotype.objects.get(gene=gene, phenotype=phenotype) except GenePhenotype.DoesNotExist: gp = GenePhenotype(gene=gene, phenotype=phenotype) gp.hgmd_id = record['hgmd_id'] gp.save() else: gene = None if record['variant_id']: variant = Variant(pk=record['variant_id']) variant._state.db = using try: vp = VariantPhenotype.objects.get(variant=variant, phenotype=phenotype) except VariantPhenotype.DoesNotExist: vp = VariantPhenotype(variant=variant, phenotype=phenotype) vp.hgmd_id = record['hgmd_id'] vp.save() else: variant = None if pubmed: phenotype.articles.add(pubmed) if variant: variant.articles.add(pubmed) if gene: gene.articles.add(pubmed) count += 1 sys.stdout.write('{0}\r'.format(count)) sys.stdout.flush() return count