예제 #1
0
    def _init(self):
        if self._standalone is True:
            return

        # should download files from synapse if required.
        self._download_data('D9C1_goldstandard.gct.zip', 'syn4595275')

        # now unzip and read the gs on the go
        z = ZIP()
        z.loadZIPFile(self.get_pathname('D9C1_goldstandard.gct.zip'))
        data = z.read('D9C1_goldstandard.gct')
        self.goldstandard = pd.read_csv(BytesIO(data), sep='[ \t]',
                skiprows=2, engine='python')
        self.goldstandard.drop(['Description'], axis=1, inplace=True)
        self.goldstandard.set_index('Name', inplace=True)
        self.goldstandard.columns = [x.strip() for x in self.goldstandard.columns]

        # get template for SC1, SC2, SC3
        self._download_data('D9C1_template_sc1.gct.zip', 'syn4595283')
        self.unzip('D9C1_template_sc1.gct.zip')
        self._download_data('D9C1_template_sc2.zip', 'syn4595587')
        self._download_data('D9C1_template_sc3.zip', 'syn4595588')

        # download gold standard for sc2
        filename = self.getpath_gs( 'D9C1_goldstandard_sc2.txt')
        self.gs_priority = pd.read_csv(filename, sep='\t', header=None)
예제 #2
0
    def download_all_data(self):
        """Download all large data sets from Synapse"""
        pb = progress_bar(5)
        # load the large gold standard file from D5C2 synapse main page
        filename = self._download_data('DREAM5_GoldStandard_probes.zip', 
                'syn2898469')
        pb.animate(1)
        z = ZIP()
        z.loadZIPFile(filename)
        data = z.read('Answers.txt')
        self.gs = pd.read_csv(StringIO.StringIO(data), sep='\t')

        # download 4 other filenames from dreamtools synapse project
        self._download_data('all_8mers.txt', 'syn4483185')
        pb.animate(2)
        self._download_data('8mers_gs.txt', 'syn4483187')
        pb.animate(3)
        self._download_data('probe35_gs.txt', 'syn4483184')
        pb.animate(4)
        self._download_data('probes35.txt', 'syn4483183')
        pb.animate(5)
예제 #3
0
    def download_all_data(self):
        """Download all large data sets from Synapse"""
        pb = progress_bar(5)
        # load the large gold standard file from D5C2 synapse main page
        filename = self._download_data('DREAM5_GoldStandard_probes.zip',
                                       'syn2898469')
        pb.animate(1)
        z = ZIP()
        z.loadZIPFile(filename)
        data = z.read('Answers.txt')
        self.gs = pd.read_csv(BytesIO(data), sep='\t')

        # download 4 other filenames from dreamtools synapse project
        self._download_data('all_8mers.txt', 'syn4483185')
        pb.animate(2)
        self._download_data('8mers_gs.txt', 'syn4483187')
        pb.animate(3)
        self._download_data('probe35_gs.txt', 'syn4483184')
        pb.animate(4)
        self._download_data('probes35.txt', 'syn4483183')
        pb.animate(5)
예제 #4
0
    def _score_sc2_sc3(self, filename):
        # looks like exactly same function in sc2/sc3
        # feature file is not used either in original code ?!

        # this should be a zip file with 2 files.
        z = ZIP()
        z.loadZIPFile(filename)
        # there should be 2 files, one ending in gct one in txt
        assert len(z.filenames) == 2, "There should be 2 files in the zip archive"

        for filename in z.filenames:
            if filename.endswith('gct'):
                prediction = z.read(filename)
                prediction = self._read_gct(prediction)
            elif filename.endswith('txt'):
                feature = z.read(filename)
                feature = self._read_feature(feature)
                # first column should be the names
                feature.set_index(0, inplace=True)
            else:
                raise ValueError("there should be only 2 files. \n" +
                        "One ending with gct extension (prediction)\n" +
                        "One ending with txt extension (feature)")

        #assert feature.shape == (2647,10)
        assert prediction.shape == (2647,44)
        self.prediction = prediction
        self.feature = feature

        # in SC2, only a subset of predictive features (2647 out of 17.000) are used
        df1 = self.goldstandard.ix[self.gs_priority[0]]
        self.df1 = df1
        scores = []
        df2 = self.prediction
        N = len(df1)
        scores = [df2.ix[i].corr(df1.ix[i], method='spearman') for i in range(0, N)]
        self.scores = scores
        final_score = sum(scores)/float(len(scores))
        return {'score': final_score}
예제 #5
0
 def unzip(self, filename):
     """Simple method to extract all files contained in an archive"""
     from dreamtools.core.ziptools import ZIP
     z = ZIP()
     z.loadZIPFile(self.get_pathname(filename)), z.extractall(self.directory)
예제 #6
0
 def unzip(self, filename):
     """Simple method to extract all files contained in an archive"""
     from dreamtools.core.ziptools import ZIP
     z = ZIP()
     z.loadZIPFile(self.get_pathname(filename)), z.extractall(
         self.directory)
예제 #7
0
def test_zip():
    z = ZIP()
    try:
        z.loadZIPFile('p')
    except:
        pass