def gene_symb_to_probe_id(symbol_list): ''' -given a list of gene symbols, return affy U133A probe ids ''' mc = mu.MongoContainer() # geneInfo = mc.gene_info.find({'pr_id':{'$in':list(probe_matrix.index.values)}}, # {'pr_id':True,'pr_gene_symbol':True},toDataFrame=True) geneInfo = mc.gene_info.find({'pr_gene_symbol':{'$in':symbol_list}}, {'pr_id':True,'pr_gene_symbol':True},toDataFrame=True) geneInfo = geneInfo[~geneInfo.pr_id.isnull()] probe_list = list(geneInfo.pr_id.values) return probe_list
def get_dos_BRDs(): 'return Series of all DOS compounds - use the DOS icollection' #get all cps from DOS collection mc = mu.MongoContainer() pertInfo = mc.pert_info.find({'pert_icollection':'DOS'}, {},toDataFrame=True) #check that it doesn't have a known pert_iname inameSer = pertInfo['pert_iname'] inameFrm = pd.DataFrame(inameSer) #which values do not start with BRD? notBRDiname = pertInfo[~inameSer.str.contains('BRD')] isBRDiname = pertInfo[inameSer.str.contains('BRD')] dosBrds = isBRDiname['pert_id'] return dosBrds
def probe_id_to_gene_symb(inFile, outFile): ''' -change the first column of probe_ids in a gct to gene symbols ''' mtrx = pd.read_csv(inFile, sep='\t', skiprows=[0, 1], index_col=0) #,header=True probe_ids = mtrx.index.values mc = mu.MongoContainer() geneInfo = mc.gene_info.find({'pr_id': { '$in': list(probe_ids) }}, { 'pr_id': True, 'pr_gene_symbol': True }, toDataFrame=True) geneInfo.index = geneInfo.pr_id geneInfo = geneInfo.reindex(mtrx.index.values) mtrx.index = geneInfo.pr_gene_symbol.values mtrx.index.name = 'Name' mtrx.to_csv(outFile, sep='\t') line_pre_adder(outFile, str(mtrx.shape[0]) + '\t' + str(mtrx.shape[1] - 1)) line_pre_adder(outFile, "#1.2")
##grab anything that appeared on a DOS plate # CM = mu.CMapMongo() # dosQuery = CM.find({'sig_id':{'$regex':'DOS'},'pert_type':'trt_cp'}, #, # {'sig_id':True,'pert_id':True,'cell_id':True,'pert_time':True,'is_gold':True,'pert_iname':True,'distil_ss':True,'distil_cc_q75':True}, # toDataFrame=True) # potentialDos = set(dosQuery['pert_id']) # #check the pert_collection status of each compounds # mc = mu.MongoContainer() # pertInfo = mc.pert_info.find({'pert_id':{'$in':list(potentialDos)}}, # {},toDataFrame=True) # collectionSets = set(pertInfo['pert_icollection']) #get all cps from DOS collection mc = mu.MongoContainer() pertInfo = mc.pert_info.find({'pert_icollection':'DOS'}, {},toDataFrame=True) #check that it doesn't have a known pert_iname inameSer = pertInfo['pert_iname'] inameFrm = pd.DataFrame(inameSer) #which values do not start with BRD? notBRDiname = pertInfo[~inameSer.str.contains('BRD')] isBRDiname = pertInfo[inameSer.str.contains('BRD')] dosBrds = isBRDiname['pert_id'] ######################## ## all DOS signatures ## ######################## # get signatures for all DOS compounds