def geneInfoH(geneNameH, geneSetH, refSeqSummaryFileName='/Z/Sequence/ucsc_hg19/annot/refSeqSummary.txt', hugoFileName='/Z/Sequence/geneinfo/hugo.txt', \ censusFileName='/Z/Sequence/geneinfo/cancer_gene_census.txt', biocartaFileName='/Z/Sequence/geneinfo/BIOCARTA.gmt', \ goFileName='/Z/Sequence/geneinfo/hugo.txt', keggFileName='/Z/Sequence/geneinfo/hugo.txt'): geneInfoH = {} for line in open(refSeqSummaryFileName): (refSeqId,status,summary) = line[:-1].split('\t') if refSeqId in geneNameH: geneName = geneNameH[refSeqId] if geneName not in geneInfoH: geneInfoH[geneName] = {} geneInfoH[geneName]['summary'] = summary for line in open(hugoFileName): (geneName,desc,aliases,geneCardName,refSeqIds) = line[:-1].split('\t') if geneName not in geneInfoH: geneInfoH[geneName] = {} geneInfoH[geneName]['desc'] = desc geneInfoH[geneName]['aliases'] = aliases geneInfoH[geneName]['refSeqIds'] = refSeqIds for line in open(censusFileName): tokL = line[:-1].split('\t') (geneName,desc,somatic,germline,mutType,translocPartners) = (tokL[0],tokL[1],tokL[7],tokL[8],tokL[12],tokL[13]) if geneName == 'Symbol': continue if geneName not in geneInfoH: geneInfoH[geneName] = {'desc':desc} geneInfoH[geneName]['census_somatic'] = somatic geneInfoH[geneName]['census_germline'] = germline geneInfoH[geneName]['census_mutType'] = mutType geneInfoH[geneName]['census_translocPartners'] = translocPartners for geneSetDB in list(geneSetH.keys()): for (geneSetName,(geneSetDesc,geneNameL)) in geneSetH[geneSetDB].items(): for geneName in geneNameL: if geneName in geneInfoH: jkbasic.addHash(geneInfoH[geneName],geneSetDB,(geneSetName,geneSetDesc)) else: geneInfoH[geneName] = {geneSetDB:[(geneSetName,geneSetDesc)]} return geneInfoH
def loadRefFlatByGeneName(refFlatFileName='refFlat.txt'): h = {} for line in open(refFlatFileName): r = processBlatLine(line) jkbasic.addHash(h, r['transName'], r) return h
def loadRefFlatByChr(refFlatFileName='/%s/D/Sequences/hg19/refFlat_hg19.txt' % (homedir,)): h = {} for line in open(refFlatFileName): r = processBlatLine(line) jkbasic.addHash(h, r['chrom'], r) return h