Example #1
0
    def set_clique_n69(self):
        '''
        load 69 PCLs currated by Rajiv 

        Parameters
        ----------
        '''
        ### load in data for individual groups
        llo = ldc.label_loader()
        self.pclDict = llo.load_clique_set_n69()
        self.all_group_cps = self.pclDict.values()
        self.test_groups = self.pclDict.keys()
    def set_clique_n69(self):
        '''
        load 69 PCLs currated by Rajiv 

        Parameters
        ----------
        '''
        ### load in data for individual groups
        llo = ldc.label_loader()
        self.pclDict = llo.load_clique_set_n69()
        self.all_group_cps = self.pclDict.values()
        self.test_groups = self.pclDict.keys()
Example #3
0
    def test_classes_incrementally(self, rnkpt_med_file, n_test_max=False):
        '''
        -start from the most internally consistent PCL - and move down the list 
        -incrementally increase the number of groups added to the classifier

        Parameters
        ----------
        rnkpt_med_file : str
            path to a file containing the median summly rankpoint values for each
            group (output from the pcla tool)
        n_test_max : int
            -max number of PCL groups to incorporate into the classifier 
            -if set to False, all groups are tested
        '''
        ### load in data for individual groups
        llo = ldc.label_loader()
        self.pclDict = llo.load_TTD()
        #load pcl rankpoint file
        groupMedians = pd.io.parsers.read_csv(rnkpt_med_file, sep='\t')
        groupMedians = groupMedians.sort('median_rankpt', ascending=False)
        # make sure compounds are not counted mroe than once in a dictionary:
        extendedCompoundList = []
        reducedPCLDict = {}
        for key in groupMedians['PCL_group']:
            value = self.pclDict[key]
            for brd in value:
                if brd in extendedCompoundList:
                    value.remove(brd)
            reducedPCLDict[key] = value
            extendedCompoundList.extend(value)
        self.pclDict = reducedPCLDict
        # set incrament of groups
        if n_test_max:
            max_groups = n_test_max
        else:
            max_groups = groupMedians.shape[0]
        group_range = np.arange(2, max_groups + 1)
        n_group_accuracy = {}
        for n_groups in group_range:
            print "testing " + str(n_groups) + " number of classes"
            testGroups = groupMedians['PCL_group'][:n_groups].values
            self.test_groups = testGroups
            self.classification_across_cell(groups_to_model=testGroups,
                                            loo_type='by_cp',
                                            max_signatures_per_cp=3)
            n_group_accuracy[n_groups] = self.model_accuracy_across_cells
        self.n_group_accuracy = n_group_accuracy
    def test_classes_incrementally(self,rnkpt_med_file,n_test_max=False):
        '''
        -start from the most internally consistent PCL - and move down the list 
        -incrementally increase the number of groups added to the classifier

        Parameters
        ----------
        rnkpt_med_file : str
            path to a file containing the median summly rankpoint values for each
            group (output from the pcla tool)
        n_test_max : int
            -max number of PCL groups to incorporate into the classifier 
            -if set to False, all groups are tested
        '''
        ### load in data for individual groups
        llo = ldc.label_loader()
        self.pclDict = llo.load_TTD()
        #load pcl rankpoint file 
        groupMedians = pd.io.parsers.read_csv(rnkpt_med_file,sep='\t')
        groupMedians = groupMedians.sort('median_rankpt',ascending=False)
        # make sure compounds are not counted mroe than once in a dictionary:
        extendedCompoundList = []
        reducedPCLDict = {}
        for key in groupMedians['PCL_group']:
            value = self.pclDict[key]
            for brd in value:
                if brd in extendedCompoundList:
                    value.remove(brd)
            reducedPCLDict[key] = value
            extendedCompoundList.extend(value)
        self.pclDict = reducedPCLDict
        # set incrament of groups
        if n_test_max:
            max_groups = n_test_max
        else:
            max_groups = groupMedians.shape[0]
        group_range = np.arange(2,max_groups+1)
        n_group_accuracy = {}
        for n_groups in group_range:
            print "testing " + str(n_groups) + " number of classes"
            testGroups = groupMedians['PCL_group'][:n_groups].values
            self.test_groups = testGroups
            self.classification_across_cell(groups_to_model=testGroups,loo_type='by_cp',max_signatures_per_cp=3)
            n_group_accuracy[n_groups] = self.model_accuracy_across_cells
        self.n_group_accuracy = n_group_accuracy
 def set_classes(self):
     '''
     specify source of class labels
     Parameters
     ----------
     '''
     ### load in data for individual groups
     llo = ldc.label_loader()
     self.pclDict = llo.load_TTD()
     ## pick 5 groups - best inter-connectors
     testGroups = ['Histone_deacetylase_1-Inhibitor',
                   'Glucocorticoid_receptor-Agonist',
                   'Proto-oncogene_tyrosine-protein_kinase_ABL1-Inhibitor',
                   'Phosphatidylinositol-4,5-bisphosphate_3-kinase_catalytic_subunit,_delta_isoform-Inhibitor',
                   '3-hydroxy-3-methylglutaryl-coenzyme_A_reductase-Inhibitor']
     brdAllGroups = []
     for group in testGroups:
         brdAllGroups.extend(self.pclDict[group])
     self.all_group_cps = brdAllGroups
     self.test_groups = testGroups
Example #6
0
 def set_classes(self):
     '''
     specify source of class labels
     Parameters
     ----------
     '''
     ### load in data for individual groups
     llo = ldc.label_loader()
     self.pclDict = llo.load_TTD()
     ## pick 5 groups - best inter-connectors
     testGroups = [
         'Histone_deacetylase_1-Inhibitor',
         'Glucocorticoid_receptor-Agonist',
         'Proto-oncogene_tyrosine-protein_kinase_ABL1-Inhibitor',
         'Phosphatidylinositol-4,5-bisphosphate_3-kinase_catalytic_subunit,_delta_isoform-Inhibitor',
         '3-hydroxy-3-methylglutaryl-coenzyme_A_reductase-Inhibitor'
     ]
     brdAllGroups = []
     for group in testGroups:
         brdAllGroups.extend(self.pclDict[group])
     self.all_group_cps = brdAllGroups
     self.test_groups = testGroups
Example #7
0
## 
# gmo = gm.GeneMod()
# gmo.load_data_from_gctx(src=cmap.score_path,symbols='TRIB1')
# gmo.z_score_filter(4)
# gmo.signature_strength_filter(8)
# gmo.cell_type_filter('HEPG2')
# # gmo.sc_plot(out=wkdir+'sc_hits.png')
# gmo.scatter()
# gmo.expression_histogram()

# gmo.ssr_plot(out=wkdir+'ssr_hits.png')
# cid = [gmo.cid[x] for x in gmo.reg_ind]

#load in drugbank annotations
reload(ldc)
llo = ldc.label_loader()
pclDict = llo.load_TTD()
dbDict = llo.load_drugbank_by_gene(group_by_action=False)
geneTargets = dbDict.keys()
#put drug-gene relationships in a file
tupList = []
for gene in dbDict:
    # make tuple
    cps = dbDict[gene]
    for cp in cps:
        tup = (cp, gene)
        tupList.append(tup)
dbFrm = pd.DataFrame(tupList,columns=['brd','target_gene'])
dbSer = pd.Series(dbFrm['target_gene'])
dbSer.index = dbFrm['brd']
cpToGenDict = dbSer.to_dict()
Example #8
0
## pick 5 groups - best inter-connectors
# testGroups = ['Histone_deacetylase_1-Inhibitor',
#               'Glucocorticoid_receptor-Agonist',
#               'Proto-oncogene_tyrosine-protein_kinase_ABL1-Inhibitor',
#               'Phosphatidylinositol-4,5-bisphosphate_3-kinase_catalytic_subunit,_delta_isoform-Inhibitor',
#               '3-hydroxy-3-methylglutaryl-coenzyme_A_reductase-Inhibitor']
# load in top groups

wkdir = '/xchip/cogs/hogstrom/analysis/scratch'
if not os.path.exists(wkdir):
    os.mkdir(wkdir)
#make pso object
pso = psc.svm_pcla(out=wkdir)
self = pso
llo = ldc.label_loader()
self.pclDict = llo.load_TTD()
#load pcl rankpoint file
rnkpt_med_file = '/xchip/cogs/projects/pharm_class/TTd_Oct29/PCL_group_rankpt_medians.txt'
groupMedians = pd.io.parsers.read_csv(rnkpt_med_file, sep='\t')
groupMedians = groupMedians.sort('median_rankpt', ascending=False)
# make sure compounds are not counted mroe than once in a dictionary:
extendedCompoundList = []
reducedPCLDict = {}
for key in groupMedians['PCL_group']:
    value = self.pclDict[key]
    for brd in value:
        if brd in extendedCompoundList:
            value.remove(brd)
    reducedPCLDict[key] = value
    extendedCompoundList.extend(value)