def test_index_ensembl_files(self): """Test the instantiation of some files for creating a datasource file.""" output_dir = "out/" GenomeBuildInstallUtils.create_ensembl_transcript_datasource(ensembl_species="saccharomyces_cerevisiae", genome_build="sacCer3") self.assertTrue(os.path.exists(output_dir + "ensembl/sacCer3")) self.assertTrue(os.path.exists(output_dir + "ensembl/ensembl.config")) statinfo = os.stat(output_dir + "ensembl/ensembl.config") self.assertTrue(statinfo.st_size > 0, "generated config file (" + output_dir + "ensembl/ensembl.config) is empty.")
def test_index_ensembl_files(self): """Test the instantiation of some files for creating a datasource file.""" output_dir = "out/" GenomeBuildInstallUtils.create_ensembl_transcript_datasource( ensembl_species="saccharomyces_cerevisiae", genome_build="sacCer3") self.assertTrue(os.path.exists(output_dir + "ensembl/sacCer3")) self.assertTrue(os.path.exists(output_dir + "ensembl/ensembl.config")) statinfo = os.stat(output_dir + "ensembl/ensembl.config") self.assertTrue( statinfo.st_size > 0, "generated config file (" + output_dir + "ensembl/ensembl.config) is empty.")
def test_previous_release_download(self): """Download an older ensembl transcript package. This test needs an internet connection and will fail w/o one. """ download_dir = "out/test_ensembl_download_previous/" release_num = "68" MutUtils.removeDir(download_dir) os.mkdir(download_dir) GenomeBuildInstallUtils.download_reference_data_from_ensembl(download_dir, "saccharomyces_cerevisiae", release=release_num) downloaded_files = os.listdir(download_dir) transcript_file = None for f in downloaded_files: if f.find("." + release_num + ".cdna.") != -1: transcript_file = f break self.assertIsNotNone(transcript_file) statinfo = os.stat(download_dir + transcript_file) self.assertTrue(statinfo.st_size > 0, "downloaded transcript file (" + transcript_file + ") is empty.")
def test_current_download(self): """Download a current ensembl transcript package. This test needs an internet connection and can be slow.""" #ftp://ftp.ensembl.org/pub/release-71/fasta/saccharomyces_cerevisiae/cdna/ download_dir = "out/test_ensembl_download/" MutUtils.removeDir(download_dir) os.mkdir(download_dir) GenomeBuildInstallUtils.download_reference_data_from_ensembl(download_dir, "saccharomyces_cerevisiae") downloaded_files = os.listdir(download_dir) transcript_file = None for f in downloaded_files: if f.find(".cdna.") != -1: transcript_file = f break self.assertIsNotNone(transcript_file) statinfo = os.stat(download_dir + transcript_file) self.assertTrue(statinfo.st_size > 0, "downloaded transcript file (" + transcript_file + ") is empty.")
def test_previous_release_download(self): """Download an older ensembl transcript package. This test needs an internet connection and will fail w/o one. """ download_dir = "out/test_ensembl_download_previous/" release_num = "68" MutUtils.removeDir(download_dir) os.mkdir(download_dir) GenomeBuildInstallUtils.download_reference_data_from_ensembl( download_dir, "saccharomyces_cerevisiae", release=release_num) downloaded_files = os.listdir(download_dir) transcript_file = None for f in downloaded_files: if f.find("." + release_num + ".cdna.") != -1: transcript_file = f break self.assertIsNotNone(transcript_file) statinfo = os.stat(download_dir + transcript_file) self.assertTrue( statinfo.st_size > 0, "downloaded transcript file (" + transcript_file + ") is empty.")
def test_current_download(self): """Download a current ensembl transcript package. This test needs an internet connection and can be slow.""" #ftp://ftp.ensembl.org/pub/release-71/fasta/saccharomyces_cerevisiae/cdna/ download_dir = "out/test_ensembl_download/" MutUtils.removeDir(download_dir) os.mkdir(download_dir) GenomeBuildInstallUtils.download_reference_data_from_ensembl( download_dir, "saccharomyces_cerevisiae") downloaded_files = os.listdir(download_dir) transcript_file = None for f in downloaded_files: if f.find(".cdna.") != -1: transcript_file = f break self.assertIsNotNone(transcript_file) statinfo = os.stat(download_dir + transcript_file) self.assertTrue( statinfo.st_size > 0, "downloaded transcript file (" + transcript_file + ") is empty.")
# Setup argument parser epilog = ''' Supported species: ''' + "\n\t".join(VALID_ENSEMBL_SPECIES) desc = ''' Download ENSEMBL files from FTP server. Please see -h for supported species list. ''' parser = ArgumentParser(description=desc, formatter_class=RawDescriptionHelpFormatter, epilog=epilog) parser.add_argument('speciesName', type=str, help="Species name") parser.add_argument('downloadDir', type=str, help="Where to download files.") # Process arguments args = parser.parse_args() return args if __name__ == "__main__": args = parseOptions() species = args.speciesName destDir = args.downloadDir if not os.path.exists(destDir): os.makedirs(destDir) GenomeBuildInstallUtils.download_reference_data_from_ensembl( destDir, species)
__author__ = 'lichtens' from oncotator.utils.install.GenomeBuildInstallUtils import GenomeBuildInstallUtils from oncotator.utils.install.GenomeBuildInstallUtils import VALID_ENSEMBL_SPECIES def parseOptions(): # Setup argument parser epilog= ''' Supported species: ''' + "\n\t".join(VALID_ENSEMBL_SPECIES) desc = ''' Download ENSEMBL files from FTP server. Please see -h for supported species list. ''' parser = ArgumentParser(description=desc, formatter_class=RawDescriptionHelpFormatter, epilog=epilog) parser.add_argument('speciesName', type=str, help="Species name") parser.add_argument('downloadDir', type=str, help="Where to download files.") # Process arguments args = parser.parse_args() return args if __name__ == "__main__": args = parseOptions() species = args.speciesName destDir = args.downloadDir if not os.path.exists(destDir): os.makedirs(destDir) GenomeBuildInstallUtils.download_reference_data_from_ensembl(destDir, species)