Beispiel #1
0
    def __init__(self, gds_name, remove_unknown=None):
        """ Retrieval of a specific GEO DataSet as a :obj:`Orange.data.Table`.

        Constructor returns the object that can retrieve GEO DataSet (samples and gene expressions).
        It first checks a local cache directory if the particular data file is loaded locally,
        else it downloads it from `NCBI's GEO FTP site <ftp://ftp.ncbi.nih.gov/pub/geo/DATA/SOFT/GDS/>`_.

        :param gds_name: An NCBI's ID for the data set in the form "GDSn" where "n" is a GDS ID number.

        :param remove_unknown: Remove spots with sample profiles that include unknown values. They are removed
                               if the proportion of samples with unknown values is above the threshold set by
                               ``remove_unknown``. If None, nothing is removed.

        """

        self.gds_name = gds_name
        self.filename = serverfiles.localpath(DOMAIN,
                                              self.gds_name + '.soft.gz')
        gds_ensure_downloaded(self.gds_name)

        self.spot2gene = {}
        self.gene2spots = {}

        self.info = None
        self.gds_data = None
        self.parse_file(remove_unknown=remove_unknown)

        taxid = taxonomy.search(self.info["sample_organism"], exact=True)
        self.info["taxid"] = taxid[0] if len(taxid) == 1 else None

        self.genes = sorted(self.gene2spots.keys())
        self.spots = sorted(self.spot2gene.keys())
        self.info["gene_count"] = len(self.genes)
Beispiel #2
0
 def get_data(gds_id, report_genes, transpose, sample_type, title):
     gds_ensure_downloaded(gds_id, progress)
     gds = GDS(gds_id)
     data = gds.get_data(report_genes=report_genes,
                         transpose=transpose,
                         sample_type=sample_type)
     data.name = title
     return data
    def test_gds_data(self):
        # test url
        self.assertIsNotNone(gds_download_url(self.test_sample))

        # file not in cache
        self.assertFalse(gds_is_cached(self.test_sample))

        # download gds from serverfiles
        try:
            makedirs(serverfiles.localpath(DOMAIN))
        except OSError:
            if path.exists(serverfiles.localpath(DOMAIN)):
                pass
            else:
                # There was an error on creation, so make sure we know about it
                raise
        gds_download(self.test_sample)

        # file in cache
        self.assertIsNone(gds_ensure_downloaded(self.test_sample))
        self.assertTrue(gds_is_cached(self.test_sample))

        gds = GDS(self.test_sample)
        self.assertIsNotNone(gds.info)
        self.assertEqual(gds.info['gene_count'], 9561)
        self.assertEqual(len(gds.info['samples']), 4)
        self.assertEqual(len(gds.info['subsets']), 2)

        self.assertEqual(gds.info['taxid'], self.test_organism)

        self.assertIsInstance(gds.get_data(), Table)
        self.assertIsInstance(gds.get_data(transpose=True), Table)