def test_index_ensembl_files(self):
        """Test the instantiation of some files for creating a datasource file."""
        output_dir = "out/"
        GenomeBuildInstallUtils.create_ensembl_transcript_datasource(ensembl_species="saccharomyces_cerevisiae", genome_build="sacCer3")

        self.assertTrue(os.path.exists(output_dir + "ensembl/sacCer3"))
        self.assertTrue(os.path.exists(output_dir + "ensembl/ensembl.config"))
        statinfo = os.stat(output_dir + "ensembl/ensembl.config")
        self.assertTrue(statinfo.st_size > 0, "generated config file (" + output_dir + "ensembl/ensembl.config) is empty.")
    def test_index_ensembl_files(self):
        """Test the instantiation of some files for creating a datasource file."""
        output_dir = "out/"
        GenomeBuildInstallUtils.create_ensembl_transcript_datasource(
            ensembl_species="saccharomyces_cerevisiae", genome_build="sacCer3")

        self.assertTrue(os.path.exists(output_dir + "ensembl/sacCer3"))
        self.assertTrue(os.path.exists(output_dir + "ensembl/ensembl.config"))
        statinfo = os.stat(output_dir + "ensembl/ensembl.config")
        self.assertTrue(
            statinfo.st_size > 0, "generated config file (" + output_dir +
            "ensembl/ensembl.config) is empty.")
    def test_previous_release_download(self):
        """Download an older ensembl transcript package.  This test needs an internet connection and will fail w/o one.
        """
        download_dir = "out/test_ensembl_download_previous/"
        release_num = "68"
        MutUtils.removeDir(download_dir)
        os.mkdir(download_dir)
        GenomeBuildInstallUtils.download_reference_data_from_ensembl(download_dir, "saccharomyces_cerevisiae", release=release_num)

        downloaded_files = os.listdir(download_dir)

        transcript_file = None
        for f in downloaded_files:
            if f.find("." + release_num + ".cdna.") != -1:
                transcript_file = f
                break

        self.assertIsNotNone(transcript_file)

        statinfo = os.stat(download_dir + transcript_file)
        self.assertTrue(statinfo.st_size > 0, "downloaded transcript file (" + transcript_file + ") is empty.")
    def test_current_download(self):
        """Download a current ensembl transcript package.  This test needs an internet connection and can be slow."""

        #ftp://ftp.ensembl.org/pub/release-71/fasta/saccharomyces_cerevisiae/cdna/
        download_dir = "out/test_ensembl_download/"
        MutUtils.removeDir(download_dir)
        os.mkdir(download_dir)
        GenomeBuildInstallUtils.download_reference_data_from_ensembl(download_dir, "saccharomyces_cerevisiae")

        downloaded_files = os.listdir(download_dir)

        transcript_file = None
        for f in downloaded_files:
            if f.find(".cdna.") != -1:
                transcript_file = f
                break

        self.assertIsNotNone(transcript_file)

        statinfo = os.stat(download_dir + transcript_file)
        self.assertTrue(statinfo.st_size > 0, "downloaded transcript file (" + transcript_file + ") is empty.")
    def test_previous_release_download(self):
        """Download an older ensembl transcript package.  This test needs an internet connection and will fail w/o one.
        """
        download_dir = "out/test_ensembl_download_previous/"
        release_num = "68"
        MutUtils.removeDir(download_dir)
        os.mkdir(download_dir)
        GenomeBuildInstallUtils.download_reference_data_from_ensembl(
            download_dir, "saccharomyces_cerevisiae", release=release_num)

        downloaded_files = os.listdir(download_dir)

        transcript_file = None
        for f in downloaded_files:
            if f.find("." + release_num + ".cdna.") != -1:
                transcript_file = f
                break

        self.assertIsNotNone(transcript_file)

        statinfo = os.stat(download_dir + transcript_file)
        self.assertTrue(
            statinfo.st_size > 0,
            "downloaded transcript file (" + transcript_file + ") is empty.")
    def test_current_download(self):
        """Download a current ensembl transcript package.  This test needs an internet connection and can be slow."""

        #ftp://ftp.ensembl.org/pub/release-71/fasta/saccharomyces_cerevisiae/cdna/
        download_dir = "out/test_ensembl_download/"
        MutUtils.removeDir(download_dir)
        os.mkdir(download_dir)
        GenomeBuildInstallUtils.download_reference_data_from_ensembl(
            download_dir, "saccharomyces_cerevisiae")

        downloaded_files = os.listdir(download_dir)

        transcript_file = None
        for f in downloaded_files:
            if f.find(".cdna.") != -1:
                transcript_file = f
                break

        self.assertIsNotNone(transcript_file)

        statinfo = os.stat(download_dir + transcript_file)
        self.assertTrue(
            statinfo.st_size > 0,
            "downloaded transcript file (" + transcript_file + ") is empty.")
    # Setup argument parser
    epilog = '''
    Supported species:

    ''' + "\n\t".join(VALID_ENSEMBL_SPECIES)

    desc = ''' Download ENSEMBL files from FTP server.  Please see -h for supported species list.
    '''
    parser = ArgumentParser(description=desc,
                            formatter_class=RawDescriptionHelpFormatter,
                            epilog=epilog)
    parser.add_argument('speciesName', type=str, help="Species name")
    parser.add_argument('downloadDir',
                        type=str,
                        help="Where to download files.")

    # Process arguments
    args = parser.parse_args()

    return args


if __name__ == "__main__":
    args = parseOptions()
    species = args.speciesName
    destDir = args.downloadDir

    if not os.path.exists(destDir):
        os.makedirs(destDir)
    GenomeBuildInstallUtils.download_reference_data_from_ensembl(
        destDir, species)
Beispiel #8
0
__author__ = 'lichtens'
from oncotator.utils.install.GenomeBuildInstallUtils import GenomeBuildInstallUtils
from oncotator.utils.install.GenomeBuildInstallUtils import VALID_ENSEMBL_SPECIES

def parseOptions():
    # Setup argument parser
    epilog= '''
    Supported species:

    ''' + "\n\t".join(VALID_ENSEMBL_SPECIES)

    desc = ''' Download ENSEMBL files from FTP server.  Please see -h for supported species list.
    '''
    parser = ArgumentParser(description=desc, formatter_class=RawDescriptionHelpFormatter, epilog=epilog)
    parser.add_argument('speciesName', type=str, help="Species name")
    parser.add_argument('downloadDir', type=str, help="Where to download files.")

    # Process arguments
    args = parser.parse_args()

    return args

if __name__ == "__main__":
    args = parseOptions()
    species = args.speciesName
    destDir = args.downloadDir

    if not os.path.exists(destDir):
        os.makedirs(destDir)
    GenomeBuildInstallUtils.download_reference_data_from_ensembl(destDir, species)