def makeReport() : #SELECT jc.var_id, COUNT( DISTINCT jc.pat_id ) #FROM `JointCalls` AS jc #INNER JOIN Variants AS v ON v.id = jc.var_id #INNER JOIN Isoforms AS i ON i.var_id = v.id #WHERE AF =0 #AND ( #functionGVS <> 'intron' #AND functionGVS <> 'near-gene-5' #AND functionGVS <> 'intergenic' #AND functionGVS <> 'near-gene-3' #AND functionGVS <> 'coding-synonymous' #AND functionGVS <> 'coding-notMod3' #) #AND ( #p_AA_AB > .9 #OR p_AA_AB > .9 #OR p_AB_BB > .9 #) #GROUP BY jc.var_id #HAVING COUNT( DISTINCT jc.pat_id ) >1 #ORDER BY COUNT( DISTINCT jc.pat_id ) DESC /*inner join Isoforms as i on i.var_id = jc.var_id WHERE p_AA_BB > .90 or p_AA_AB > .9*/ #LIMIT 0 , 30 fout = open("../output/joint.tsv",'w') csvout = csv.writer( fout, \ delimiter='\t', \ quoting=csv.QUOTE_MINIMAL ) extend = ["patient"] + headers[4:] reuse = queries.column_headers[:-5] csvout.writerow( reuse + extend ) conn = db.Conn("localhost") conn2 = db.Conn("localhost") call_where = "and (p_AA_AB > .9 or p_AA_AB > .9 or p_AB_BB > .9)" call_where = ''' and (normal_counts_a / (normal_counts_a + normal_counts_b) > .95 ) and normal_counts_a + normal_counts_b > 10''' query = ''' SELECT i.id, %s,%s,%s FROM `JointCalls` as jc inner join Variants as v on v.id = jc.var_id inner join Isoforms as i on i.var_id = v.id inner join Genes as g on i.gene_id = g.id where AF <= .0005 and (%s) %s and dbSNP is null and scorePhastCons >= .5 ORDER BY AF, jc.var_id, i.id''' % \ (queries.vcols_string, queries.icols_string, queries.gcols_string, \ queries.gvs, call_where) rows = conn.query( query ) last_iid = -1 for i, row in enumerate(rows) : print row outrow = queries.formatQueryRow(row,offset=1) iso_id = row[0] var_id = row[1] gt_lists = queries.getPatients( conn2, var_id, table='JointCalls', where_clause = call_where ) #print " ----------- " #print gt_lists if last_iid != iso_id : for gt,calls in gt_lists : for call in calls : pat_name = call[1] rest = call[2].split(',')[6:] together = outrow+[pat_name]+rest together_string = "".join([str(t) for t in together]) csvout.writerow( together ) last = together_string last_iid = iso_id else : print "dupe!" #if i > 20 : break #hets_string = ", ".join([t[1] for t in hets]) #homs_string = ", ".join([t[1] for t in homs]) #if patient in hets_string or patient in homs_string : #already_called += 1 #outrow.extend( ["-",len(homs),homs_string,len(hets),hets_string] ) fout.close()
inner join Genes as g on g.id = i.gene_id where chrom = %s and pos = %s and AF < .1 and (%s)''' % \ (queries.vcols_string, queries.icols_string, queries.gcols_string, \ chrom, pos, queries.gvs) rows = conn.query(query) num_rows = len(rows) if num_rows > 0 : hits += 1 total += 1 for row in rows : outrow = queries.formatQueryRow(row) (noinfs, hets, homs) = queries.getPatients( conn2, row[0] ) hets_string = ", ".join([t[1] for t in hets]) homs_string = ", ".join([t[1] for t in homs]) if patient in hets_string or patient in homs_string : already_called += 1 outrow.extend( ["-",len(homs),homs_string,len(hets),hets_string] ) csvout.writerow( outrow ) # hom and het shares print patient print "%d/%d hits/total" % (hits,total) print "%d/%d already_called/hits" % (already_called,hits) fin.close() fout.close()