コード例 #1
0
def ensembl(args):
    """
    %prog ensembl species

    Retrieve genomes and annotations from ensembl FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog ensembl danio_rerio,gasterosteus_aculeatus
    """
    p = OptionParser(ensembl.__doc__)
    p.add_option("--version", default="75", help="Ensembl version")
    opts, args = p.parse_args(args)

    version = opts.version
    url = "ftp://ftp.ensembl.org/pub/release-{0}/".format(version)
    fasta_url = url + "fasta/"

    valid_species = [x for x in ls_ftp(fasta_url) if "." not in x]
    doc = "\n".join((ensembl.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (species,) = args
    species = species.split(",")
    for s in species:
        download_species_ensembl(s, valid_species, url)
コード例 #2
0
ファイル: fetch.py プロジェクト: yangjl/jcvi
def ensembl(args):
    """
    %prog ensembl species

    Retrieve genomes and annotations from ensembl FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog ensembl danio_rerio,gasterosteus_aculeatus
    """
    p = OptionParser(ensembl.__doc__)
    p.add_option("--version", default="75",
                 help="Ensembl version [default: %default]")
    opts, args = p.parse_args(args)

    version = opts.version
    url = "ftp://ftp.ensembl.org/pub/release-{0}/".format(version)
    fasta_url = url + "fasta/"

    valid_species = [x for x in ls_ftp(fasta_url) if "." not in x]
    doc = "\n".join((ensembl.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        download_species_ensembl(s, valid_species, url)
コード例 #3
0
ファイル: fetch.py プロジェクト: yangjl/jcvi
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    p = OptionParser(phytozome.__doc__)
    p.add_option("--version", default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly", default=False, action="store_true",
                 help="Download assembly [default: %default]")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        download_species_phytozome(s, valid_species, url, assembly=opts.assembly)
コード例 #4
0
def test_ls_ftp():
    from jcvi.apps.base import ls_ftp

    url = "ftp://ftp.ensembl.org/pub/release-75/fasta/"
    valid_species = [x for x in ls_ftp(url) if "." not in x]
    assert "saccharomyces_cerevisiae" in valid_species
    assert "gorilla_gorilla" in valid_species
    assert len(valid_species) == 67
コード例 #5
0
ファイル: fetch.py プロジェクト: tanghaibao/jcvi
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    from jcvi.formats.gff import bed as gff_bed
    from jcvi.formats.fasta import format as fasta_format

    p = OptionParser(phytozome.__doc__)
    p.add_option("--version", default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly", default=False, action="store_true",
                 help="Download assembly [default: %default]")
    p.add_option("--format", default=False, action="store_true",
                 help="Format to CDS and BED for synteny inference")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
        format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")
    use_IDs = set()
    # We have to watch out when the gene names and mRNA names mismatch, in which
    # case we just extract the mRNA names
    use_mRNAs = set(["Cclementina", "Creinhardtii", "Csinensis", "Fvesca",
                     "Lusitatissimum", "Mesculenta", "Mguttatus", "Ppersica",
                     "Pvirgatum", "Rcommunis", "Sitalica", "Tcacao",
                     "Thalophila", "Vcarteri", "Vvinifera", "Zmays"])

    for s in species:
        gff, fa = download_species_phytozome(s, valid_species, url,
                                             assembly=opts.assembly)
        key = "ID" if s in use_IDs else "Name"
        ttype = "mRNA" if s in use_mRNAs else "gene"
        if not opts.format:
            continue

        bedfile = s + ".bed"
        cdsfile = s + ".cds"
        gff_bed([gff, "--type={}".format(ttype), "--key={}".format(key),
                 "-o", bedfile])
        fasta_format([fa, cdsfile, r"--sep=|"])
コード例 #6
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    from jcvi.formats.gff import bed as gff_bed
    from jcvi.formats.fasta import format as fasta_format

    p = OptionParser(phytozome.__doc__)
    p.add_option("--version", default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly", default=False, action="store_true",
                 help="Download assembly [default: %default]")
    p.add_option("--format", default=False, action="store_true",
                 help="Format to CDS and BED for synteny inference")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")
    use_IDs = set()
    # We have to watch out when the gene names and mRNA names mismatch, in which
    # case we just extract the mRNA names
    use_mRNAs = set(["Cclementina", "Creinhardtii", "Csinensis", "Fvesca",
                    "Lusitatissimum", "Mesculenta", "Mguttatus", "Ppersica",
                    "Pvirgatum", "Rcommunis", "Sitalica", "Tcacao",
                    "Thalophila", "Vcarteri", "Vvinifera", "Zmays"])

    for s in species:
        gff, fa = download_species_phytozome(s, valid_species, url,
                                             assembly=opts.assembly)
        key = "ID" if s in use_IDs else "Name"
        ttype = "mRNA" if s in use_mRNAs else "gene"
        if not opts.format:
            continue

        bedfile = s + ".bed"
        cdsfile = s + ".cds"
        gff_bed([gff, "--type={}".format(ttype), "--key={}".format(key),
                 "-o", bedfile])
        fasta_format([fa, cdsfile, r"--sep=|"])
コード例 #7
0
def download_species_ensembl(species, valid_species, url):
    assert species in valid_species, "{0} is not in the species list".format(species)

    # We want to download assembly and annotation for given species
    ann_url = urljoin(url, "gtf/{0}".format(species))
    cds_url = urljoin(url, "fasta/{0}/cds".format(species))

    for u in (ann_url, cds_url):
        valid_files = [x for x in ls_ftp(u) if x.endswith(".gz")]
        for f in valid_files:
            f = urljoin(u, f)
            download(f)
コード例 #8
0
ファイル: fetch.py プロジェクト: yangjl/jcvi
def download_species_ensembl(species, valid_species, url):
    assert species in valid_species, \
            "{0} is not in the species list".format(species)

    # We want to download assembly and annotation for given species
    ann_url = urljoin(url, "gtf/{0}".format(species))
    cds_url = urljoin(url, "fasta/{0}/cds".format(species))

    for u in (ann_url, cds_url):
        valid_files = [x for x in ls_ftp(u) if x.endswith(".gz")]
        for f in valid_files:
            f = urljoin(u, f)
            download(f)
コード例 #9
0
def phytozome9(args):
    """
    %prog phytozome9 species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome9 Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    p = OptionParser(phytozome9.__doc__)
    p.add_option(
        "--assembly",
        default=False,
        action="store_true",
        help="Download assembly",
    )
    p.add_option(
        "--format",
        default=False,
        action="store_true",
        help="Format to CDS and BED for synteny inference",
    )
    opts, args = p.parse_args(args)

    version = "9.0"
    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".format(version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome9.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (species, ) = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")

    for s in species:
        res = download_species_phytozome9(s,
                                          valid_species,
                                          url,
                                          assembly=opts.assembly)
        if not res:
            logging.error("No files downloaded")
        gff, cdsfa = res.get("gff"), res.get("cds")
        if opts.format:
            format_bed_and_cds(s, gff, cdsfa)
コード例 #10
0
ファイル: fetch.py プロジェクト: lizhencmb/jcvi
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    from jcvi.formats.gff import bed as gff_bed
    from jcvi.formats.fasta import format as fasta_format

    p = OptionParser(phytozome.__doc__)
    p.add_option("--version",
                 default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly",
                 default=False,
                 action="store_true",
                 help="Download assembly [default: %default]")
    p.add_option("--format",
                 default=False,
                 action="store_true",
                 help="Format to CDS and BED for synteny inference")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        gff, fa = download_species_phytozome(s,
                                             valid_species,
                                             url,
                                             assembly=opts.assembly)
        if not opts.format:
            continue

        bedfile = s + ".bed"
        cdsfile = s + ".cds"
        gff_bed([gff, "--phytozome", "-o", bedfile])
        fasta_format([fa, cdsfile, r"--sep=|"])
コード例 #11
0
def download_species_phytozome9(species, valid_species, base_url, assembly=False):
    assert species in valid_species, "{} is not in the species list".format(species)

    # We want to download assembly and annotation for given species
    surl = urljoin(base_url, species)
    contents = [x for x in ls_ftp(surl) if x.endswith("_readme.txt")]
    magic = contents[0].split("_")[1]  # Get the magic number
    logging.debug("Found magic number for {0}: {1}".format(species, magic))

    pf = "{0}_{1}".format(species, magic)
    asm_url = urljoin(surl, "assembly/{0}.fa.gz".format(pf))
    ann_url = urljoin(surl, "annotation/{0}_gene.gff3.gz".format(pf))
    cds_url = urljoin(surl, "annotation/{0}_cds.fa.gz".format(pf))
    res = {}
    if assembly:
        res["asm"] = download(asm_url)
    res["gff"] = download(ann_url)
    res["cds"] = download(cds_url)
    return res
コード例 #12
0
ファイル: fetch.py プロジェクト: yangjl/jcvi
def download_species_phytozome(species, valid_species, url, assembly=False):
    from os.path import join as urljoin

    assert species in valid_species, \
            "{0} is not in the species list".format(species)

    # We want to download assembly and annotation for given species
    surl = urljoin(url, species)
    contents = [x for x in ls_ftp(surl) if x.endswith("_readme.txt")]
    magic = contents[0].split("_")[1]  # Get the magic number
    logging.debug("Found magic number for {0}: {1}".format(species, magic))

    pf = "{0}_{1}".format(species, magic)
    asm_url = urljoin(surl, "assembly/{0}.fa.gz".format(pf))
    ann_url = urljoin(surl, "annotation/{0}_gene.gff3.gz".format(pf))
    cds_url = urljoin(surl, "annotation/{0}_cds.fa.gz".format(pf))
    if assembly:
        download(asm_url)
    for u in (ann_url, cds_url):
        download(u)
コード例 #13
0
def download_species_phytozome(species, valid_species, url, assembly=False):
    from os.path import join as urljoin

    assert species in valid_species, \
            "{0} is not in the species list".format(species)

    # We want to download assembly and annotation for given species
    surl = urljoin(url, species)
    contents = [x for x in ls_ftp(surl) if x.endswith("_readme.txt")]
    magic = contents[0].split("_")[1]  # Get the magic number
    logging.debug("Found magic number for {0}: {1}".format(species, magic))

    pf = "{0}_{1}".format(species, magic)
    asm_url = urljoin(surl, "assembly/{0}.fa.gz".format(pf))
    ann_url = urljoin(surl, "annotation/{0}_gene.gff3.gz".format(pf))
    cds_url = urljoin(surl, "annotation/{0}_cds.fa.gz".format(pf))
    if assembly:
        download(asm_url)
    for u in (ann_url, cds_url):
        download(u)
コード例 #14
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome FTP. Available species
    listed below. Use comma to give a list of species to download. For example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum
    """
    p = OptionParser(phytozome.__doc__)
    p.add_option("--version",
                 default="9.0",
                 help="Phytozome version [default: %default]")
    p.add_option("--assembly",
                 default=False,
                 action="store_true",
                 help="Download assembly [default: %default]")
    opts, args = p.parse_args(args)

    url = "ftp://ftp.jgi-psf.org/pub/compgen/phytozome/v{0}/".\
                    format(opts.version)
    valid_species = [x for x in ls_ftp(url) if "." not in x]

    doc = "\n".join((phytozome.__doc__, tile(valid_species)))
    p.set_usage(doc)

    if len(args) != 1:
        sys.exit(not p.print_help())

    species, = args
    species = species.split(",")
    for s in species:
        download_species_phytozome(s,
                                   valid_species,
                                   url,
                                   assembly=opts.assembly)