def download_gencode_gff(gff_url, output_dir, decompress): """ Download GENCODE GFF3 file. Updated 2020-02-09. """ download( url=gff_url, output_dir=os.path.join(output_dir, "gff"), decompress=decompress, )
def download_refseq_gtf(build, release_url, output_dir, decompress): """ Download RefSeq GTF file. Updated 2020-02-09. """ download( url=paste_url(release_url, build + "_latest_genomic.gtf.gz"), output_dir=os.path.join(output_dir, "gtf"), decompress=decompress, )
def download_gencode_genome(genome_fasta_url, output_dir, decompress): """ Download GENCODE genome FASTA. Updated 2020-02-09. """ download( url=genome_fasta_url, output_dir=os.path.join(output_dir, "genome"), decompress=decompress, )
def download_refseq_transcriptome(build, release_url, output_dir, decompress): """ Download RefSeq transcriptome FASTA. Updated 2020-02-09. """ download( url=paste_url(release_url, build + "_latest_rna.fna.gz"), output_dir=os.path.join(output_dir, "transcriptome"), decompress=decompress, )
def download_flybase_gff(release_url, output_dir, decompress, dmel): """ Download FlyBase GFF3 file. Updated 2020-02-09. """ output_dir = os.path.join(output_dir, "gff") gff_url = paste_url(release_url, "gff") download(url=paste_url(gff_url, "md5sum.txt"), output_dir=output_dir) download( url=paste_url(gff_url, "dmel-all-" + dmel + ".gff.gz"), output_dir=output_dir, decompress=decompress, )
def download_flybase_genome(release_url, output_dir, decompress, dmel): """ Download genome FASTA. Updated 2020-02-09. """ output_dir = os.path.join(output_dir, "genome") fasta_url = paste_url(release_url, "fasta") download(url=paste_url(fasta_url, "md5sum.txt"), output_dir=output_dir) download( url=paste_url(fasta_url, "dmel-all-aligned-" + dmel + ".fasta.gz"), output_dir=output_dir, decompress=decompress, )
def download_ensembl_transcriptome(organism, build, release_url, output_dir, decompress): """ Download Ensembl transcriptome FASTA. Updated 2020-02-09. """ output_dir = os.path.join(output_dir, "transcriptome") base_url = paste_url(release_url, "fasta", organism.lower(), "cdna") readme_url = paste_url(base_url, "README") checksums_url = paste_url(base_url, "CHECKSUMS") fasta_url = paste_url(base_url, organism + "." + build + ".cdna.all.fa.gz") download(url=readme_url, output_dir=output_dir) download(url=checksums_url, output_dir=output_dir) download(url=fasta_url, output_dir=output_dir, decompress=decompress)
def download_ensembl_gff(organism, build, release, release_url, output_dir, decompress): """ Download Ensembl GFF3 file. Updated 2020-02-09. """ output_dir = os.path.join(output_dir, "gff") base_url = paste_url(release_url, "gff3", organism.lower()) readme_url = paste_url(base_url, "README") checksums_url = paste_url(base_url, "CHECKSUMS") gff_url = paste_url(base_url, organism + "." + build + "." + release + ".gff3.gz") download(url=readme_url, output_dir=output_dir) download(url=checksums_url, output_dir=output_dir) download(url=gff_url, output_dir=output_dir, decompress=decompress) if organism in ("H**o sapiens", "Mus musculus"): gtf_patch_url = paste_url( base_url, organism + "." + build + "." + release + ".chr_patch_hapl_scaff.gff3.gz", ) download(url=gtf_patch_url, output_dir=output_dir, decompress=decompress)
def download_ensembl_genome(organism, build, release_url, output_dir, decompress): """ Download Ensembl genome FASTA. Updated 2020-02-09. """ output_dir = os.path.join(output_dir, "genome") base_url = paste_url(release_url, "fasta", organism.lower(), "dna") readme_url = paste_url(base_url, "README") checksums_url = paste_url(base_url, "CHECKSUMS") if organism in ("Homo_sapiens", "Mus_musculus"): assembly = "primary_assembly" else: assembly = "toplevel" fasta_url = paste_url( base_url, organism + "." + build + ".dna." + assembly + ".fa.gz") download(url=readme_url, output_dir=output_dir) download(url=checksums_url, output_dir=output_dir) download(url=fasta_url, output_dir=output_dir, decompress=decompress)
def download_flybase_transcriptome(release_url, output_dir, decompress, dmel): """ Download FlyBase transcriptome FASTA. Updated 2020-02-09. """ output_dir = os.path.join(output_dir, "transcriptome") cat_dir = os.path.join(output_dir, "cat") output_fasta_file = os.path.join( output_dir, "dmel-transcriptome-" + dmel + ".fasta.gz") fasta_url = paste_url(release_url, "fasta") download(url=paste_url(fasta_url, "md5sum.txt"), output_dir=cat_dir) download( url=paste_url(fasta_url, "dmel-all-transcript-" + dmel + ".fasta.gz"), output_dir=cat_dir, ) download( url=paste_url(fasta_url, "dmel-all-miRNA-" + dmel + ".fasta.gz"), output_dir=cat_dir, ) download( url=paste_url(fasta_url, "dmel-all-miscRNA-" + dmel + ".fasta.gz"), output_dir=cat_dir, ) download( url=paste_url(fasta_url, "dmel-all-ncRNA-" + dmel + ".fasta.gz"), output_dir=cat_dir, ) download( url=paste_url(fasta_url, "dmel-all-pseudogene-" + dmel + ".fasta.gz"), output_dir=cat_dir, ) download( url=paste_url(fasta_url, "dmel-all-tRNA-" + dmel + ".fasta.gz"), output_dir=cat_dir, ) if not os.path.isfile(output_fasta_file): print("Concatenating '" + output_fasta_file + "'.") fasta_glob = os.path.join(cat_dir, "dmel-all-*.fasta.gz") shell("cat " + fasta_glob + " > " + output_fasta_file) if decompress is True: decompress_but_keep_original(output_fasta_file)