Example #1
0
def phytozome(args):
    """
    %prog phytozome species

    Retrieve genomes and annotations from phytozome using Globus API. Available
    species listed below. Use comma to give a list of species to download. For
    example:

    $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum

    The downloader will prompt you to enter Phytozome user name and password
    during downloading. Please register for a login at:
    https://phytozome.jgi.doe.gov/pz/portal.html.
    """
    from jcvi.apps.biomart import GlobusXMLParser

    p = OptionParser(phytozome.__doc__)
    p.add_option(
        "--version",
        default="12",
        choices=("9", "10", "11", "12", "12_unrestricted", "13"),
        help="Phytozome version",
    )
    p.add_option(
        "--assembly",
        default=False,
        action="store_true",
        help="Download assembly",
    )
    p.add_option(
        "--format",
        default=False,
        action="store_true",
        help="Format to CDS and BED for synteny inference",
    )
    p.set_downloader()
    opts, args = p.parse_args(args)

    downloader = opts.downloader
    directory_listing = ".phytozome_directory_V{}.xml".format(opts.version)
    # Get directory listing
    base_url = "http://genome.jgi.doe.gov"
    dlist = "{}/ext-api/downloads/get-directory?organism=PhytozomeV{}".format(
        base_url, opts.version
    )

    # Make sure we have a valid cookies
    cookies = get_cookies()
    if cookies is None:
        logging.error("Error fetching cookies ... cleaning up")
        FileShredder([directory_listing])
        sys.exit(1)

    # Proceed to use the cookies and download the species list
    try:
        download(
            dlist,
            filename=directory_listing,
            cookies=cookies,
            downloader=downloader,
        )
        g = GlobusXMLParser(directory_listing)
    except:
        logging.error("Error downloading directory listing ... cleaning up")
        FileShredder([directory_listing, cookies])
        sys.exit(1)

    genomes = g.get_genomes()
    valid_species = genomes.keys()
    species_tile = tile(valid_species)
    p.set_usage("\n".join((phytozome.__doc__, species_tile)))

    if len(args) != 1:
        sys.exit(not p.print_help())

    (species,) = args
    if species == "all":
        species = ",".join(valid_species)

    species = species.split(",")
    for s in species:
        res = download_species_phytozome(
            genomes,
            s,
            valid_species,
            base_url,
            cookies,
            assembly=opts.assembly,
            downloader=downloader,
        )
        if not res:
            logging.error("No files downloaded")
        gff, fa = res.get("gff"), res.get("cds")
        if opts.format:
            format_bed_and_cds(s, gff, fa)