Ejemplo n.º 1
0
def download_gencode_gff(gff_url, output_dir, decompress):
    """
    Download GENCODE GFF3 file.
    Updated 2020-02-09.
    """
    download(
        url=gff_url,
        output_dir=os.path.join(output_dir, "gff"),
        decompress=decompress,
    )
Ejemplo n.º 2
0
def download_refseq_gtf(build, release_url, output_dir, decompress):
    """
    Download RefSeq GTF file.
    Updated 2020-02-09.
    """
    download(
        url=paste_url(release_url, build + "_latest_genomic.gtf.gz"),
        output_dir=os.path.join(output_dir, "gtf"),
        decompress=decompress,
    )
Ejemplo n.º 3
0
def download_gencode_genome(genome_fasta_url, output_dir, decompress):
    """
    Download GENCODE genome FASTA.
    Updated 2020-02-09.
    """
    download(
        url=genome_fasta_url,
        output_dir=os.path.join(output_dir, "genome"),
        decompress=decompress,
    )
Ejemplo n.º 4
0
def download_refseq_transcriptome(build, release_url, output_dir, decompress):
    """
    Download RefSeq transcriptome FASTA.
    Updated 2020-02-09.
    """
    download(
        url=paste_url(release_url, build + "_latest_rna.fna.gz"),
        output_dir=os.path.join(output_dir, "transcriptome"),
        decompress=decompress,
    )
Ejemplo n.º 5
0
def download_flybase_gff(release_url, output_dir, decompress, dmel):
    """
    Download FlyBase GFF3 file.
    Updated 2020-02-09.
    """
    output_dir = os.path.join(output_dir, "gff")
    gff_url = paste_url(release_url, "gff")
    download(url=paste_url(gff_url, "md5sum.txt"), output_dir=output_dir)
    download(
        url=paste_url(gff_url, "dmel-all-" + dmel + ".gff.gz"),
        output_dir=output_dir,
        decompress=decompress,
    )
Ejemplo n.º 6
0
def download_flybase_genome(release_url, output_dir, decompress, dmel):
    """
    Download genome FASTA.
    Updated 2020-02-09.
    """
    output_dir = os.path.join(output_dir, "genome")
    fasta_url = paste_url(release_url, "fasta")
    download(url=paste_url(fasta_url, "md5sum.txt"), output_dir=output_dir)
    download(
        url=paste_url(fasta_url, "dmel-all-aligned-" + dmel + ".fasta.gz"),
        output_dir=output_dir,
        decompress=decompress,
    )
Ejemplo n.º 7
0
def download_ensembl_transcriptome(organism, build, release_url, output_dir,
                                   decompress):
    """
    Download Ensembl transcriptome FASTA.
    Updated 2020-02-09.
    """
    output_dir = os.path.join(output_dir, "transcriptome")
    base_url = paste_url(release_url, "fasta", organism.lower(), "cdna")
    readme_url = paste_url(base_url, "README")
    checksums_url = paste_url(base_url, "CHECKSUMS")
    fasta_url = paste_url(base_url, organism + "." + build + ".cdna.all.fa.gz")
    download(url=readme_url, output_dir=output_dir)
    download(url=checksums_url, output_dir=output_dir)
    download(url=fasta_url, output_dir=output_dir, decompress=decompress)
Ejemplo n.º 8
0
def download_ensembl_gff(organism, build, release, release_url, output_dir,
                         decompress):
    """
    Download Ensembl GFF3 file.
    Updated 2020-02-09.
    """
    output_dir = os.path.join(output_dir, "gff")
    base_url = paste_url(release_url, "gff3", organism.lower())
    readme_url = paste_url(base_url, "README")
    checksums_url = paste_url(base_url, "CHECKSUMS")
    gff_url = paste_url(base_url,
                        organism + "." + build + "." + release + ".gff3.gz")
    download(url=readme_url, output_dir=output_dir)
    download(url=checksums_url, output_dir=output_dir)
    download(url=gff_url, output_dir=output_dir, decompress=decompress)
    if organism in ("H**o sapiens", "Mus musculus"):
        gtf_patch_url = paste_url(
            base_url,
            organism + "." + build + "." + release +
            ".chr_patch_hapl_scaff.gff3.gz",
        )
        download(url=gtf_patch_url,
                 output_dir=output_dir,
                 decompress=decompress)
Ejemplo n.º 9
0
def download_ensembl_genome(organism, build, release_url, output_dir,
                            decompress):
    """
    Download Ensembl genome FASTA.
    Updated 2020-02-09.
    """
    output_dir = os.path.join(output_dir, "genome")
    base_url = paste_url(release_url, "fasta", organism.lower(), "dna")
    readme_url = paste_url(base_url, "README")
    checksums_url = paste_url(base_url, "CHECKSUMS")
    if organism in ("Homo_sapiens", "Mus_musculus"):
        assembly = "primary_assembly"
    else:
        assembly = "toplevel"
    fasta_url = paste_url(
        base_url, organism + "." + build + ".dna." + assembly + ".fa.gz")
    download(url=readme_url, output_dir=output_dir)
    download(url=checksums_url, output_dir=output_dir)
    download(url=fasta_url, output_dir=output_dir, decompress=decompress)
Ejemplo n.º 10
0
def download_flybase_transcriptome(release_url, output_dir, decompress, dmel):
    """
    Download FlyBase transcriptome FASTA.
    Updated 2020-02-09.
    """
    output_dir = os.path.join(output_dir, "transcriptome")
    cat_dir = os.path.join(output_dir, "cat")
    output_fasta_file = os.path.join(
        output_dir, "dmel-transcriptome-" + dmel + ".fasta.gz")
    fasta_url = paste_url(release_url, "fasta")
    download(url=paste_url(fasta_url, "md5sum.txt"), output_dir=cat_dir)
    download(
        url=paste_url(fasta_url, "dmel-all-transcript-" + dmel + ".fasta.gz"),
        output_dir=cat_dir,
    )
    download(
        url=paste_url(fasta_url, "dmel-all-miRNA-" + dmel + ".fasta.gz"),
        output_dir=cat_dir,
    )
    download(
        url=paste_url(fasta_url, "dmel-all-miscRNA-" + dmel + ".fasta.gz"),
        output_dir=cat_dir,
    )
    download(
        url=paste_url(fasta_url, "dmel-all-ncRNA-" + dmel + ".fasta.gz"),
        output_dir=cat_dir,
    )
    download(
        url=paste_url(fasta_url, "dmel-all-pseudogene-" + dmel + ".fasta.gz"),
        output_dir=cat_dir,
    )
    download(
        url=paste_url(fasta_url, "dmel-all-tRNA-" + dmel + ".fasta.gz"),
        output_dir=cat_dir,
    )
    if not os.path.isfile(output_fasta_file):
        print("Concatenating '" + output_fasta_file + "'.")
        fasta_glob = os.path.join(cat_dir, "dmel-all-*.fasta.gz")
        shell("cat " + fasta_glob + " > " + output_fasta_file)
        if decompress is True:
            decompress_but_keep_original(output_fasta_file)