예제 #1
0
    def compute_statistics(self):
        """Returns final results of the user predcition

        :return: a dataframe with various metrics for each transcription factor.

        Must call :meth:`score` before.

        """
        data = {
            'Pearson': [],
            'Spearman': [],
            'Pearson_Log': [],
            "AUROC_8mer": [],
            "AUPR_8mer": [],
            "AUROC_probe": [],
            "AUPR_probe": []
        }

        pb = progress_bar(self.Ntf, interval=1)
        for tf_index in range(1, self.Ntf + 1):
            dfdata = pd.read_csv(self._setfile(tf_index, "Data"),
                                 sep='\t',
                                 header=None)
            pearson = dfdata.corr('pearson').ix[0, 1]
            spearman = dfdata.corr('spearman').ix[0, 1]
            pearsonLog = np.log10(dfdata).corr('pearson').ix[0, 1]

            data['Pearson'].append(pearson)
            data['Pearson_Log'].append(pearsonLog)
            data['Spearman'].append(spearman)

            dvdata = self._dvs[tf_index]

            r = ROCDiscovery(dvdata.values)
            rocdata = r.get_statistics()
            auroc = r.compute_auc(roc=rocdata)
            aupr = r.compute_aupr(roc=rocdata)
            data['AUROC_8mer'].append(auroc)
            data['AUPR_8mer'].append(aupr)

            dvdata = self._dvps[tf_index]
            r = ROCDiscovery(dvdata.values)
            rocdata = r.get_statistics()
            auroc = r.compute_auc(roc=rocdata)
            aupr = r.compute_aupr(roc=rocdata)
            data['AUROC_probe'].append(auroc)
            data['AUPR_probe'].append(aupr)
            pb.animate(tf_index)

        df = pd.DataFrame(data)
        df = df[[
            'Pearson', u'Spearman', u'Pearson_Log', u'AUROC_8mer',
            u'AUPR_8mer', u'AUROC_probe', u'AUPR_probe'
        ]]

        return df
예제 #2
0
    def compute_statistics(self):
        """Returns final results of the user predcition

        :return: a dataframe with various metrics for each transcription factor.

        Must call :meth:`score` before.

        """
        data = {'Pearson': [],
                'Spearman': [],
                'Pearson_Log': [],
                "AUROC_8mer": [],
                "AUPR_8mer": [],
                "AUROC_probe": [],
                "AUPR_probe": []}

        pb = progress_bar(self.Ntf, interval=1)
        for tf_index in range(1, self.Ntf + 1):
            dfdata = pd.read_csv(self._setfile(tf_index, "Data"), sep='\t', header=None)
            pearson = dfdata.corr('pearson').ix[0,1]
            spearman = dfdata.corr('spearman').ix[0,1]
            pearsonLog = np.log10(dfdata).corr('pearson').ix[0,1]

            data['Pearson'].append(pearson)
            data['Pearson_Log'].append(pearsonLog)
            data['Spearman'].append(spearman)

            dvdata = self._dvs[tf_index]

            r = ROCDiscovery(dvdata.values)
            rocdata = r.get_statistics()
            auroc = r.compute_auc(roc=rocdata)
            aupr = r.compute_aupr(roc=rocdata)
            data['AUROC_8mer'].append(auroc)
            data['AUPR_8mer'].append(aupr)

            dvdata = self._dvps[tf_index]
            r = ROCDiscovery(dvdata.values)
            rocdata = r.get_statistics()
            auroc = r.compute_auc(roc=rocdata)
            aupr = r.compute_aupr(roc=rocdata)
            data['AUROC_probe'].append(auroc)
            data['AUPR_probe'].append(aupr)
            pb.animate(tf_index)

        df = pd.DataFrame(data)
        df = df[['Pearson', u'Spearman', u'Pearson_Log', u'AUROC_8mer', 
            u'AUPR_8mer', u'AUROC_probe', u'AUPR_probe']]

        return df
예제 #3
0
    def score(self, filename):
        """

        :return: dictionay with AUC/AUPR metrics and score.


        """
        self._load_proba()
        prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None)
        gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]',
                engine='python', header=None)
        prediction.columns = ['sequence', 'value']
        gold.columns = ['sequence', 'value']

        # merge the prediction and gold based on the sequence.
        data = pd.merge(prediction, gold, how='inner', on=['sequence'],
                suffixes=['_pred', '_gold'])
        # sory by prediction

        try:
            data.sort_values(by=['value_pred'], ascending=False, inplace=True)
        except:
            data.sort(columns=['value_pred'], ascending=False, inplace=True)
        data.columns = ['Sequence', 'prediction_values', 'prediction']

        self.data = data

        from dreamtools.core.rocs import ROCDiscovery
        self.roc = ROCDiscovery(self.data['prediction'])
        self.roc.get_statistics()
        auroc = self.roc.compute_auc()
        aupr = self.roc.compute_aupr()

        P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr)
        P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc)

        score = np.mean(-np.log10([P_AUROC, P_AUPR]))

        return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR,
                'pval_auroc':P_AUROC, 'score':score}
예제 #4
0
    def score(self, filename):
        """

        :return: dictionay with AUC/AUPR metrics and score.


        """
        self._load_proba()
        prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None)
        gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]',
                engine='python', header=None)
        prediction.columns = ['sequence', 'value']
        gold.columns = ['sequence', 'value']

        # merge the prediction and gold based on the sequence.
        data = pd.merge(prediction, gold, how='inner', on=['sequence'],
                suffixes=['_pred', '_gold'])
        # sory by prediction

        try:
            data.sort_values(by=['value_pred'], ascending=False, inplace=True)
        except:
            data.sort(columns=['value_pred'], ascending=False, inplace=True)
        data.columns = ['Sequence', 'prediction_values', 'prediction']

        self.data = data

        from dreamtools.core.rocs import ROCDiscovery
        self.roc = ROCDiscovery(self.data['prediction'])
        self.roc.get_statistics()
        auroc = self.roc.compute_auc()
        aupr = self.roc.compute_aupr()

        P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr)
        P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc)

        score = np.mean(-np.log10([P_AUROC, P_AUPR]))

        return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR,
                'pval_auroc':P_AUROC, 'score':score}
예제 #5
0
class D5C1(Challenge):
    """A class dedicated to D5C1 challenge


    ::

        from dreamtools import D5C1
        s = D5C1()
        filename = s.download_template() 
        s.score(filename) 

    Data and templates are downloaded from Synapse. You must have a login.

    """
    def __init__(self):
        """.. rubric:: constructor

        """
        super(D5C1, self).__init__('D5C1')
        self._path2data = os.path.split(os.path.abspath(__file__))[0]
        self._init()
        self.sub_challenges = []

    def _init(self):
        # should download files from synapse if required.
        self._download_data('AUPR.mat', 'syn4560154')
        self._download_data('AUROC.mat', 'syn4560158')
        self._download_data('DREAM5_EAR_GoldStandard.tsv', 'syn4560182')
        self._download_data('DREAM5_EAR_myteam_Predictions.txt', 'syn4560167')

    def download_template(self):
        # should return full path to a template file
        return self.get_pathname('DREAM5_EAR_myteam_Predictions.txt')

    def download_goldstandard(self):
        # should return full path to a gold standard file
        return self.get_pathname('DREAM5_EAR_GoldStandard.tsv')

    def _load_proba(self):
        import scipy.io
        self.auroc = scipy.io.loadmat(self.get_pathname("AUROC.mat"))
        self.aupr = scipy.io.loadmat(self.get_pathname("AUPR.mat"))

    def score(self, filename):

        self._load_proba()
        prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None)
        gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]', 
                engine='python', header=None)
        prediction.columns = ['sequence', 'value']
        gold.columns = ['sequence', 'value']

        # merge the prediction and gold based on the sequence.
        data = pd.merge(prediction, gold, how='inner', on=['sequence'], 
                suffixes=['_pred', '_gold'])
        # sory by prediction
        data.sort(columns=['value_pred'], ascending=False, inplace=True)
        data.columns = ['Sequence', 'prediction_values', 'prediction']

        self.data = data

        from dreamtools.core.rocs import ROCDiscovery
        self.roc = ROCDiscovery(self.data['prediction'])
        self.roc.get_statistics()
        auroc = self.roc.compute_auc()
        aupr = self.roc.compute_aupr()


        P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr)
        P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc)

        # overall dream score
        #i#P = [ p_auroc p_aupr ];
        #o#verall_score = mean(-log10(P)')';

        score = np.mean(-np.log10([P_AUROC, P_AUPR]))

        return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR, 'pval_auroc':P_AUROC,
                'score':score}


    def _probability(self, X, Y, x):
        dx = X[2] - X[1]
        return  sum( Y[X>=x] * dx )
예제 #6
0
class D5C1(Challenge):
    """A class dedicated to D5C1 challenge

    ::

        from dreamtools import D5C1
        s = D5C1()
        filename = s.download_template()
        s.score(filename)

    """
    def __init__(self, verbose=True, download=True, **kargs):
        """.. rubric:: constructor"""
        super(D5C1, self).__init__('D5C1', verbose, download)
        self._init()
        self.sub_challenges = []

    def _init(self):
        if self._standalone is True:
            return

        # should download files from synapse if required.
        self._download_data('AUPR.mat', 'syn4560154')
        self._download_data('AUROC.mat', 'syn4560158')
        self._download_data('DREAM5_EAR_GoldStandard.tsv', 'syn4560182')
        self._download_data('DREAM5_EAR_myteam_Predictions.txt', 'syn4560167')

    def download_template(self):
        # should return full path to a template file
        return self.get_pathname('DREAM5_EAR_myteam_Predictions.txt')

    def download_goldstandard(self):
        # should return full path to a gold standard file
        return self.get_pathname('DREAM5_EAR_GoldStandard.tsv')

    def _load_proba(self):
        import scipy.io
        self.auroc = scipy.io.loadmat(self.get_pathname("AUROC.mat"))
        self.aupr = scipy.io.loadmat(self.get_pathname("AUPR.mat"))

    def score(self, filename):
        """

        :return: dictionay with AUC/AUPR metrics and score.


        """
        self._load_proba()
        prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None)
        gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]',
                engine='python', header=None)
        prediction.columns = ['sequence', 'value']
        gold.columns = ['sequence', 'value']

        # merge the prediction and gold based on the sequence.
        data = pd.merge(prediction, gold, how='inner', on=['sequence'],
                suffixes=['_pred', '_gold'])
        # sory by prediction

        try:
            data.sort_values(by=['value_pred'], ascending=False, inplace=True)
        except:
            data.sort(columns=['value_pred'], ascending=False, inplace=True)
        data.columns = ['Sequence', 'prediction_values', 'prediction']

        self.data = data

        from dreamtools.core.rocs import ROCDiscovery
        self.roc = ROCDiscovery(self.data['prediction'])
        self.roc.get_statistics()
        auroc = self.roc.compute_auc()
        aupr = self.roc.compute_aupr()

        P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr)
        P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc)

        score = np.mean(-np.log10([P_AUROC, P_AUPR]))

        return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR,
                'pval_auroc':P_AUROC, 'score':score}

    def _probability(self, X, Y, x):
        dx = X[2] - X[1]
        return  sum( Y[X>=x] * dx )