Beispiel #1
0
    def __init__(self, filename, verboseR=True):
        Challenge.__init__(self, challenge_name='D8C2')
        RTools.__init__(self, verboseR=verboseR)

        self.filename = filename
        self._path2data = os.path.split(os.path.abspath(__file__))[0]
Beispiel #2
0
    def __init__(self, filename, verboseR=True):
        Challenge.__init__(self, challenge_name='D8C2')
        RTools.__init__(self, verboseR=verboseR)

        self.filename = filename
Beispiel #3
0
    def score(self, filename):
        self.G = self._read_challenge(self.download_goldstandard())
        self.T = self._read_challenge(filename)

        #from scipy.stats.stats import pearsonr, spearmanr
        G = self.G[self.G.columns[2:]].values
        T = self.T[self.T.columns[2:]].values

        # Using scipy, the pvalue are not the same as in matlab for several reasons.
        # first scipy returns only 2-tail pvalue but more importantly, it is
        # a rough approximation as mentionned in their doc and when compared
        # to matlab differences can be large. So, we use R, which results are
        # also differnt but much close (1-2% different
        """data = [spearmanr(G[i,:], T[i,:]) for i in range(0,50)]
        rho_row = [x[0] for x in data]
        pval_row = [x[1] for x in data]
        # row correlation
        data = [spearmanr(G[:,i], T[:,i]) for i in range(0,8)]
        rho_col = [x[0] for x in data]
        pval_col = [x[1] for x in data]
        """

        from dreamtools.core.rtools import RTools
        rtool = RTools(verboseR=False)
        pval_row = []
        pval_col = []
        rho_row = []
        rho_col = []

        for i in range(0, 50):
            rtool.session.t = T[i, :].copy()
            rtool.session.g = G[i, :].copy()
            rtool.session.run(
                "results = cor.test(t, g, method='spearman', alternative='greater', exact=F)"
            )
            rho_row.append(rtool.session.results['estimate'])
            pval_row.append(rtool.session.results['p.value'])
        for i in range(0, 8):
            rtool.session.t = T[:, i].copy()
            rtool.session.g = G[:, i].copy()
            rtool.session.run(
                "results = cor.test(t, g, method='spearman', alternative='greater', exact=F)"
            )
            rho_col.append(rtool.session.results['estimate'])
            pval_col.append(rtool.session.results['p.value'])

        print("""
WARNING: the spearman correlation pvalue are computed using R. Pvalues are
slightly different from those computed using matlab and therefore the final
values may differ by a few percents to the pvlues reported in the original
challenge. \n""")

        self.rho_col = rho_col
        self.pval_col = pval_col
        self.rho_row = rho_row
        self.pval_row = pval_row

        score1 = np.exp(np.nansum(np.log(pval_row)) / 50)
        score2 = np.exp(np.nansum(np.log(pval_col)) / 8.)

        score = sum(-np.log10([score1, score2])) / 2.
        return {'score': score}
Beispiel #4
0
    def __init__(self, filename, verboseR=True):
        Challenge.__init__(self, challenge_name='D8C2')
        RTools.__init__(self, verboseR=verboseR)

        self.filename = filename
Beispiel #5
0
    def score_challengeB(self, filenames):
        # Ideally provide 3 filenames but if only 1 is given, try
        # to infer the names of the 2 others
        cor_pheno1 = []
        cor_pheno2 = []
        pval_pheno1 = []
        pval_pheno2 = []
        scores = []
        from dreamtools.core.rtools import RTools
        rtool = RTools(verboseR=False)

        assert len(filenames) == 3, "Must provide 3 files"

        self.golds = []
        self.preds = []
        gold_filenames = self.download_goldstandard('B')
        print("Warning: your 3 submissions should be ordered as B1, B2, B3 files")

        for tag in [1, 2, 3]:
            #assumeing data and gs are sorted in the same way !!
            gold = pd.read_csv(gold_filenames[tag-1], sep='[ \t]', 
                    engine='python')
            self.golds.append(gold)

            #filename = 'DREAM5_SysGenB%s_your_Predictions.txt' % tag
            #filename = self._pj([self.classpath, 'data', filename])
            filename = filenames[tag-1]
            pred1 = pd.read_csv(filename, sep='[ \t]', engine='python')
            self.preds.append(pred1)

            # correlation gs versus predicted
            rtool.session.t = pred1.ix[0].values
            rtool.session.g = gold.ix[0].values
            rtool.session.run("results = cor.test(t, g, method='spearman', alternative='greater')")
            T1 = rtool.session.results.copy()

            rtool.session.t = pred1.ix[1].values
            rtool.session.g = gold.ix[1].values
            rtool.session.run("results = cor.test(t, g, method='spearman', alternative='greater')")
            T2 = rtool.session.results.copy()
            cor_pheno1.append(T1['estimate'])
            cor_pheno2.append(T2['estimate'])
            pval_pheno1.append(T1['p.value'])
            pval_pheno2.append(T2['p.value'])

            score = -(np.log(T1['p.value']) + np.log(T2['p.value']))
            scores.append(score)

        self.corp1 = cor_pheno1
        self.corp2 = cor_pheno2
        self.pval1 = pval_pheno1
        self.pval2 = pval_pheno2
        self.scores = scores

        # This part now compute the pvalues using random prediction
        random_scores = {0:[],1:[],2:[]}

        from easydev import Progress
        pb = Progress(self.N_pvalues, interval=1)

        for ii in range(1, self.N_pvalues):
            for tag in [0,1,2]:
                #generate random coordinates
                coord = random.sample(['RIL%s' % i for i in range(1,31)], 30)
                coord2 = random.sample(['RIL%s' % i for i in range(1,31)], 30)

                # Obtaining random scores
                rtool.session.t = self.preds[tag].ix[0].ix[coord].values
                rtool.session.g = self.golds[tag].ix[0].values
                rtool.session.run("results = cor.test(t, g, method='spearman', alternative='greater')")
                T1 = rtool.session.results.copy()
                rtool.session.t = self.preds[tag].ix[1].ix[coord2].values
                rtool.session.g = self.golds[tag].ix[1].values
                rtool.session.run("results = cor.test(t, g, method='spearman', alternative='greater')")
                T2 = rtool.session.results.copy()

                random_scores[tag].append(-(np.log(T1['p.value']) + np.log(T2['p.value'])))
            pb.animate(ii+1)
        self.random_scores = random_scores
        #Obtaining p-values
        pvals = [sum(self.random_scores[k]>= self.scores[k])/float(self.N_pvalues)
                for k in [0,1,2]]
        self.pvals = pvals

        df = pd.DataFrame({'scores':self.scores,
            'correlation_phenotype1':cor_pheno1,
            'correlation_phenotype2':cor_pheno2,
            'pvalues_phenotype1':pval_pheno1,
            'pvalues_phenotype2':pval_pheno2,
            'pvalues':self.pvals})
        df= df.T
        df.columns = ['SysGenB1', 'SysGenB2', 'SysGenB3']
        return df