def main(self): self.ofd.write(IHDR) # tid2parents = {} for e in db.sql(QEDGES): l = e["label"] if l == "is-a": rel = "is_a: %(pid)s ! %(parent)s\n" % e else: rel = "relationship: part_of %(pid)s ! %(parent)s\n" % e tid2parents.setdefault(e["_term_key"], []).append(rel) # tid2synonyms = {} for s in db.sql(QSYNONYMS): syn = 'synonym: "%(synonym)s" RELATED []\n' % s tid2synonyms.setdefault(s["_term_key"], []).append(syn) # tid2altids = {} for a in db.sql(QALTIDS): aid = "alt_id: %(id)s\n" % a tid2altids.setdefault(a["_term_key"], []).append(aid) # for r in db.sql(QTERMS): r["relationships"] = "".join(tid2parents.get(r["_term_key"], [])) r["synonyms"] = "".join(tid2synonyms.get(r["_term_key"], [])) r["altids"] = "".join(tid2altids.get(r["_term_key"], [])) self.ofd.write(ITERM % r) # self.ofd.write(ITAIL) # self.ofd.close()
def cacheGeneIds(): geneIds = set() query = ''' SELECT aa.accid FROM ACC_Accession aa, MRK_MCV_Cache mm WHERE aa._mgitype_key = 2 AND aa.private = 0 AND aa._object_key = mm._marker_key AND mm.term = 'gene' AND aa._logicaldb_key in (1,13,59,60,85,131,132,133,134) ''' db.sql(query, lambda r: geneIds.add(r['accid'])) return geneIds
def loadMgiTypeKeys(self): tkeys = {} q = ''' SELECT _mgitype_key, name FROM ACC_MGIType ''' for r in db.sql(q): tkeys[r['name']] = r['_mgitype_key'] return tkeys
def uploadIds( ids, conn, tname="_ids", colname="id", index="_ids_ix_0", chunksize=500): db.sql(('create temporary table %s (%s varchar(255))' % (tname,colname)), None, conn) for i in xrange(0,len(ids),chunksize): cmds = [] for id in ids[i:i+chunksize]: cmds.append("insert into %s values ('%s')" % (tname,id)) db.sql(cmds,'ignore',conn) if index: db.sql(('create unique index %s on %s(%s)' % (index, tname, colname)), None, conn) return tname
def iterNotes( **kwargs ): note = None# current note to yield qry = buildQuery( ** kwargs ) notechunks = db.sql( qry ) for nc in notechunks: if nc['sequencenum'] == 1: if note: note['note'] = note['note'].strip() yield note note = nc else: note['note'] += nc['note'] note['sequencenum'] = nc['sequencenum'] if note: note['note'] = note['note'].strip() yield note
def loadUnciteablePubs(self): q = '''select br._Refs_key as _refs_key from BIB_Refs br, ACC_Accession acc where br._Refs_key = acc._Object_key and acc._MGIType_key = 1 and acc.prefixPart = 'J:' and acc._LogicalDB_key = 1 and ( br.journal ilike 'database%' or br.journal ilike 'personal%' or br.journal ilike 'Genbank Submission' or br.refType = 'BOOK' or br.journal ilike '%Data Submission%' or ( br.journal is null and br._ReviewStatus_key = 2) )''' for r in db.sql(q): self.unciteablePubs[r['_refs_key']] = 1;
def getSeqData(ids): # create db connection conn = db.connect() # # Create temp table of MGI ids idTbl = uploadIds(ids, conn, tname="_ids") # # Create temp table containing basic info for markers db.sql(''' create temporary table _markers ( _marker_key int, mgiid varchar(30), symbol varchar(255), name varchar(255), chromosome varchar(4), mgitype varchar(50), mcvtype varchar(255) ) ''',None,conn) db.sql(''' insert into _markers select m._marker_key, a.accid as "mgiid", m.symbol, m.name, m.chromosome, t.name as "mgitype", v.term as "mcvtype" from MRK_Marker m, MRK_Types t, ACC_Accession a, MRK_MCV_Cache v where m._Organism_key = 1 and m._Marker_Type_key = t._Marker_Type_key and m._Marker_key = a._Object_key and a._MGIType_key = 2 and a._LogicalDB_key = 1 and a.prefixPart = 'MGI:' and a.preferred = 1 and a.accID in (select id from %s) and m._Marker_key = v._Marker_key and v.qualifier = 'D' '''%idTbl, None, conn) db.sql(''' create index idx0 on _markers(_marker_key) ''', None, conn) # # Temp table of sequences (keys) that are associated with only 1 marker db.sql(''' create temporary table _seq1marker (_sequence_key int) ''', None, conn) db.sql(''' insert into _seq1marker select _sequence_key from SEQ_Marker_Cache group by _Sequence_key having count(*) = 1 ''',None, conn) db.sql(''' create index s1m_idx1 on _seq1marker(_sequence_key) ''', None, conn) # # results = db.sql(''' select m._marker_key, m.mgiid, m.symbol, m.name, m.chromosome, m.mgitype, m.mcvtype, c.accid as "seqid", s.length, t.term as "seqtype" from _markers m, SEQ_Marker_Cache c, SEQ_Sequence s, VOC_Term t where c._Marker_key = m._marker_key and c._Sequence_key = s._Sequence_key and c._LogicalDB_key in (9,27) /* (9,27,35,36,53) ??*/ and s._Sequence_key in (select _Sequence_key from _seq1marker) and s._SequenceType_key = t._Term_key order by m.mgiid, c.accid ''', None, conn) # conn.close() # return results
# # getSOterms.py # # import sys import mgiadhoc as db SO_vocab_key = 138 SO_logicaldb_key = 145 SO_terms = ''' SELECT t.term, a.accid FROM VOC_Term t, ACC_ACCESSION a WHERE t._vocab_key = %d AND t._term_key = a._object_key AND a._logicaldb_key = %d AND a.preferred = 1 ''' % (SO_vocab_key, SO_logicaldb_key) for r in db.sql(SO_terms): print(r['accid'] + '\t' + r['term'])
def doQ(con, q): r = db.sql( q, connection=con ) r.sort() return r
def sql(self, q, p=None, args={}): self.log(str(q)) return db.sql(q, p, args=args)