def phytozome10(args): """ %prog phytozome species Retrieve genomes and annotations from phytozome using Globus API. Available species listed below. Use comma to give a list of species to download. For example: $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum """ from jcvi.apps.biomart import GlobusXMLParser p = OptionParser(phytozome10.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) species, = args """ cookies = get_cookies() # Get directory listing dlist = \ "http://genome.jgi.doe.gov/ext-api/downloads/get-directory?organism=PhytozomeV10" d = download(dlist, debug=True, cookies=cookies) """ fp = open("get-directory.html") g = GlobusXMLParser(fp) g.parse_folder()
def phytozome(args): """ %prog phytozome species Retrieve genomes and annotations from phytozome using Globus API. Available species listed below. Use comma to give a list of species to download. For example: $ %prog phytozome Athaliana,Vvinifera,Osativa,Sbicolor,Slycopersicum The downloader will prompt you to enter Phytozome user name and password during downloading. Please register for a login at: https://phytozome.jgi.doe.gov/pz/portal.html. """ from jcvi.apps.biomart import GlobusXMLParser p = OptionParser(phytozome.__doc__) p.add_option( "--version", default="12", choices=("9", "10", "11", "12", "12_unrestricted", "13"), help="Phytozome version", ) p.add_option( "--assembly", default=False, action="store_true", help="Download assembly", ) p.add_option( "--format", default=False, action="store_true", help="Format to CDS and BED for synteny inference", ) p.set_downloader() opts, args = p.parse_args(args) downloader = opts.downloader directory_listing = ".phytozome_directory_V{}.xml".format(opts.version) # Get directory listing base_url = "http://genome.jgi.doe.gov" dlist = "{}/ext-api/downloads/get-directory?organism=PhytozomeV{}".format( base_url, opts.version ) # Make sure we have a valid cookies cookies = get_cookies() if cookies is None: logging.error("Error fetching cookies ... cleaning up") FileShredder([directory_listing]) sys.exit(1) # Proceed to use the cookies and download the species list try: download( dlist, filename=directory_listing, cookies=cookies, downloader=downloader, ) g = GlobusXMLParser(directory_listing) except: logging.error("Error downloading directory listing ... cleaning up") FileShredder([directory_listing, cookies]) sys.exit(1) genomes = g.get_genomes() valid_species = genomes.keys() species_tile = tile(valid_species) p.set_usage("\n".join((phytozome.__doc__, species_tile))) if len(args) != 1: sys.exit(not p.print_help()) (species,) = args if species == "all": species = ",".join(valid_species) species = species.split(",") for s in species: res = download_species_phytozome( genomes, s, valid_species, base_url, cookies, assembly=opts.assembly, downloader=downloader, ) if not res: logging.error("No files downloaded") gff, fa = res.get("gff"), res.get("cds") if opts.format: format_bed_and_cds(s, gff, fa)