Example #1
0
def gene_symb_to_probe_id(symbol_list):
    '''
    -given a list of gene symbols, return affy U133A probe ids

    '''
    mc = mu.MongoContainer()
    # geneInfo = mc.gene_info.find({'pr_id':{'$in':list(probe_matrix.index.values)}},
    #         {'pr_id':True,'pr_gene_symbol':True},toDataFrame=True)
    geneInfo = mc.gene_info.find({'pr_gene_symbol':{'$in':symbol_list}},
            {'pr_id':True,'pr_gene_symbol':True},toDataFrame=True)
    geneInfo = geneInfo[~geneInfo.pr_id.isnull()]
    probe_list = list(geneInfo.pr_id.values)
    return probe_list
Example #2
0
def get_dos_BRDs():
    'return Series of all DOS compounds - use the DOS icollection'
    #get all cps from DOS collection
    mc = mu.MongoContainer()
    pertInfo = mc.pert_info.find({'pert_icollection':'DOS'},
                {},toDataFrame=True)
    #check that it doesn't have a known pert_iname
    inameSer = pertInfo['pert_iname']
    inameFrm = pd.DataFrame(inameSer)
    #which values do not start with BRD?
    notBRDiname = pertInfo[~inameSer.str.contains('BRD')]
    isBRDiname = pertInfo[inameSer.str.contains('BRD')]
    dosBrds = isBRDiname['pert_id']
    return dosBrds
Example #3
0
def probe_id_to_gene_symb(inFile, outFile):
    '''
    -change the first column of probe_ids in a gct to gene symbols

    '''
    mtrx = pd.read_csv(inFile, sep='\t', skiprows=[0, 1],
                       index_col=0)  #,header=True
    probe_ids = mtrx.index.values
    mc = mu.MongoContainer()
    geneInfo = mc.gene_info.find({'pr_id': {
        '$in': list(probe_ids)
    }}, {
        'pr_id': True,
        'pr_gene_symbol': True
    },
                                 toDataFrame=True)
    geneInfo.index = geneInfo.pr_id
    geneInfo = geneInfo.reindex(mtrx.index.values)
    mtrx.index = geneInfo.pr_gene_symbol.values
    mtrx.index.name = 'Name'
    mtrx.to_csv(outFile, sep='\t')
    line_pre_adder(outFile, str(mtrx.shape[0]) + '\t' + str(mtrx.shape[1] - 1))
    line_pre_adder(outFile, "#1.2")
##grab anything that appeared on a DOS plate
# CM = mu.CMapMongo()
# dosQuery = CM.find({'sig_id':{'$regex':'DOS'},'pert_type':'trt_cp'}, #, 
#         {'sig_id':True,'pert_id':True,'cell_id':True,'pert_time':True,'is_gold':True,'pert_iname':True,'distil_ss':True,'distil_cc_q75':True},
#         toDataFrame=True)
# potentialDos = set(dosQuery['pert_id'])

# #check the pert_collection status of each compounds
# mc = mu.MongoContainer()
# pertInfo = mc.pert_info.find({'pert_id':{'$in':list(potentialDos)}},
#             {},toDataFrame=True)
# collectionSets = set(pertInfo['pert_icollection'])


#get all cps from DOS collection
mc = mu.MongoContainer()
pertInfo = mc.pert_info.find({'pert_icollection':'DOS'},
            {},toDataFrame=True)
#check that it doesn't have a known pert_iname
inameSer = pertInfo['pert_iname']
inameFrm = pd.DataFrame(inameSer)
#which values do not start with BRD?
notBRDiname = pertInfo[~inameSer.str.contains('BRD')]
isBRDiname = pertInfo[inameSer.str.contains('BRD')]
dosBrds = isBRDiname['pert_id']

########################
## all DOS signatures ##
########################

# get signatures for all DOS compounds