def main(): """This function launches the EcoData Retriever.""" if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'): # if no command line args are passed, launch GUI check_for_updates(graphical=False if 'darwin' in platform.platform().lower() else True) lists = get_lists() from retriever.app.main import launch_app launch_app(lists) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates(graphical=False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print citation_path with open(citation_path) as citation_file: print citation_file.read() else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print dataset.description return elif args.command == 'gui': lists = get_lists() from retriever.app.main import launch_app launch_app(lists) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return if args.command == 'ls' or args.dataset is None: import lscolumns #If scripts have never been downloaded there is nothing to list if not script_list: print "No scripts are currently available. Updating scripts now..." check_for_updates(graphical=False) print "\n\nScripts downloaded.\n" script_list = SCRIPT_LIST() all_scripts = set([script.shortname for script in script_list]) all_tags = set(["ALL"] + [tag.strip().upper() for script in script_list for tagset in script.tags for tag in tagset.split('>')]) print "Available datasets (%s):" % len(all_scripts) lscolumns.printls(sorted(list(all_scripts), key=lambda s: s.lower())) print "Groups:" lscolumns.printls(sorted(list(all_tags))) return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False scripts = name_matches(script_list, args.dataset) if scripts: for dataset in scripts: print "=> Installing", dataset.name try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print e if debug: raise print "Done!" else: print "The dataset %s isn't currently available in the Retriever" % (args.dataset) print "Run 'retriever -ls to see a list of currently available datasets"
def main(): import argparse bp_formats = ' | '.join(bp._io.supported_formats) input_formats = bp_formats output_formats = '%s | ascii' % bp_formats parser = argparse.ArgumentParser() parser.add_argument('--version', action='version', version=__version__) parser.add_argument('-v', '--verbose', action='store_true', help='write out SPARQL queries before executing') parser.add_argument('-s', '--store', help='name of Redland store (default=virtuoso)') parser.add_argument('-d', '--dsn', help='ODBC DSN (default=Virtuoso)') parser.add_argument('-u', '--user', help='ODBC user (default=dba)') parser.add_argument('-p', '--password', help='ODBC password (default=dba)') subparsers = parser.add_subparsers(help='sub-command help', dest='command') # treestore add: add trees to the database add_parser = subparsers.add_parser('add', help='add trees to treestore') add_parser.add_argument('file', help='tree file') add_parser.add_argument('uri', help='tree uri (default=file name)', nargs='?', default=None) add_parser.add_argument('-f', '--format', help='file format (%s)' % input_formats, nargs='?', default='newick') add_parser.add_argument('--rooted', help='this is a rooted tree', action='store_true') add_parser.add_argument('--taxonomy', help="the URI of a taxonomy graph to label higher-order taxa", nargs='?', default=None) add_parser.add_argument('--tax-root', help="the name of the top-most taxonomic group in the tree, used to subset the taxonomy and avoid homonymy issues", nargs='?', default=None) # treestore get: download an entire tree get_parser = subparsers.add_parser('get', help='retrieve trees from treestore') get_parser.add_argument('uri', help='tree uri') get_parser.add_argument('-f', '--format', help='serialization format (%s) (default=newick)' % output_formats, nargs='?', default='newick') # treestore rm: delete trees from the database rm_parser = subparsers.add_parser('rm', help='remove trees from treestore') rm_parser.add_argument('uri', help='tree uri') # treestore ls: list trees ls_parser = subparsers.add_parser('ls', help='list all trees in treestore') ls_parser.add_argument('contains', help='comma-delimited list of desired taxa', nargs='?', default='') ls_parser.add_argument('--counts', help="display the number of matched taxa next to each tree URI", action='store_true') ls_parser.add_argument('-l', help="list one per line; don't try to pretty-print", action='store_true') ls_parser.add_argument('-f', help="show full URIs instead of just IDs", action='store_true') ls_parser.add_argument('--taxonomy', help="the URI of a taxonomy graph to enable synonymy lookup", nargs='?', default=None) ls_parser.add_argument('--filter', help="SPARQL graph pattern that returned trees must match", nargs='?', default=None) # treestore names: get list of taxa contained in a tree names_parser = subparsers.add_parser('names', help='return a comma-separated list of all taxa names') names_parser.add_argument('uri', help='tree uri (default=all trees)', nargs='?', default=None) names_parser.add_argument('-f', '--format', help='file format (json, csv, xml) (default=csv)', default='csv') # treestore count: count the number of labeled nodes count_parser = subparsers.add_parser('count', help='returns the number of labeled nodes in a tree') count_parser.add_argument('uri', help='tree uri (default=all trees)', nargs='?', default=None) # treestore query: create a subtree from a list of taxa query_parser = subparsers.add_parser('query', help='retrieve the best subtree containing a given set of taxa') query_parser.add_argument('contains', help='comma-delimited list of desired taxa', nargs='?') query_parser.add_argument('uri', help='tree uri (default=select automatically)', nargs='?', default=None) query_parser.add_argument('-f', '--format', help='serialization format (%s) (default=newick)' % output_formats, nargs='?', default='newick') query_parser.add_argument('--complete', help="return complete subtree from MRCA; don't prune other taxa from the resulting tree", action='store_true') query_parser.add_argument('--taxonomy', help="the URI of a taxonomy graph to enable synonymy lookup", nargs='?', default=None) query_parser.add_argument('--filter', help="SPARQL graph pattern that returned trees must match", nargs='?', default=None) # treestore annotate: add metadata annotations to tree ann_parser = subparsers.add_parser('annotate', help='annotate tree with triples from RDF file') ann_parser.add_argument('uri', help='tree uri', default=None) ann_parser.add_argument('--file', help='annotation file') ann_parser.add_argument('--text', help='annotation, in turtle format', default=None) ann_parser.add_argument('--doi', help='tree source DOI', default=None) args = parser.parse_args() if args.dsn: kwargs['dsn'] = args.dsn if args.user: kwargs['user'] = args.user if args.password: kwargs['password'] = args.password elif not 'password' in kwargs: password = getpass() kwargs['verbose'] = args.verbose treestore = Treestore(**kwargs) if args.command == 'add': # parse a tree and add it to the treestore treestore.add_trees(args.file, args.format, args.uri, rooted=args.rooted, taxonomy=args.taxonomy, tax_root=args.tax_root) elif args.command == 'get': # get a tree, serialize in specified format, and output to stdout treestore.serialize_trees(args.uri, args.format, handle=sys.stdout) elif args.command == 'rm': # remove a certain tree from the treestore treestore.remove_trees(args.uri) elif args.command == 'ls': # list all trees in the treestore or trees containing a list of taxa contains = args.contains if contains: contains = set([s.strip() for s in contains.split(',')]) trees = list(treestore.list_trees_containing_taxa( contains=contains, taxonomy=args.taxonomy, show_counts=args.counts, filter=args.filter)) if args.counts: trees = ['%s (%s)' % tree for tree in trees] else: trees = [str(x) for x in trees] else: trees = list(treestore.list_trees(filter=args.filter)) if not trees: exit() if not args.f: trees = [treestore.id_from_uri(x) for x in trees] if args.l: print '\n'.join(trees) else: import lscolumns lscolumns.printls(trees) elif args.command == 'names': print treestore.get_names(tree_uri=args.uri, format=args.format) elif args.command == 'count': print len([r for r in treestore.get_names(tree_uri=args.uri, format=None)]) elif args.command == 'query': contains = set([s.strip() for s in args.contains.split(',')]) treestore.get_subtree(contains=contains, tree_uri=args.uri, format=args.format, prune=not args.complete, taxonomy=treestore.uri_from_id(args.taxonomy) if args.taxonomy else None, filter=args.filter, handle=sys.stdout, ) elif args.command == 'annotate': treestore.annotate(args.uri, annotations=args.text, annotation_file=args.file, doi=args.doi)
def main(): import argparse bp_formats = ' | '.join(bp._io.supported_formats) input_formats = bp_formats output_formats = '%s | ascii' % bp_formats parser = argparse.ArgumentParser() parser.add_argument('-v', '--version', action='version', version=__version__) parser.add_argument('-s', '--store', help='name of Redland store (default=virtuoso)') parser.add_argument('-d', '--dsn', help='ODBC DSN (default=Virtuoso)') parser.add_argument('-u', '--user', help='ODBC user (default=dba)') parser.add_argument('-p', '--password', help='ODBC password (default=dba)') subparsers = parser.add_subparsers(help='sub-command help', dest='command') add_parser = subparsers.add_parser('add', help='add trees to treestore') add_parser.add_argument('file', help='tree file') add_parser.add_argument('format', help='file format (%s)' % input_formats) add_parser.add_argument('uri', help='tree uri (default=file name)', nargs='?', default=None) add_parser.add_argument('--bulk', help='use the virtuoso bulk loader', action='store_true') add_parser.add_argument('--puid', help='create a pseudo-unique ID for the tree', action='store_true') add_parser.add_argument('--rooted', help='this is a rooted tree', action='store_true') get_parser = subparsers.add_parser('get', help='retrieve trees from treestore') get_parser.add_argument('uri', help='tree uri') get_parser.add_argument('format', help='serialization format (%s) (default=newick)' % output_formats, nargs='?', default='newick') rm_parser = subparsers.add_parser('rm', help='remove trees from treestore') rm_parser.add_argument('uri', help='tree uri') ls_parser = subparsers.add_parser('ls', help='list all trees in treestore') ls_parser.add_argument('contains', help='comma-delimited list of species that must be contained in each returned tree (default=none)', nargs='?', default='') ls_parser.add_argument('--all', help='only return trees that contain all given species', action='store_true') names_parser = subparsers.add_parser('names', help='return a comma-separated list of all taxa names') names_parser.add_argument('uri', help='tree uri (default=all trees)', nargs='?', default=None) names_parser.add_argument('-f', '--format', help='file format (json, csv, xml) (default=csv)', default='csv') count_parser = subparsers.add_parser('count', help='returns the number of labelled nodes in a tree') count_parser.add_argument('uri', help='tree uri (default=all trees)', nargs='?', default=None) query_parser = subparsers.add_parser('query', help='retrieve the best subtree containing a given set of taxa') query_parser.add_argument('contains', help='comma-delimited list of species that must be contained in each returned tree', nargs='?') query_parser.add_argument('format', help='serialization format (%s) (default=newick)' % output_formats, nargs='?', default='newick') query_parser.add_argument('uri', help='tree uri (default=select automatically)', nargs='?', default=None) query_parser.add_argument('--all', help='only return trees that contain all given species', action='store_true') query_parser.add_argument('--complete', help="return complete subtree from MRCA; don't prune other taxa from the resulting tree", action='store_true') ann_parser = subparsers.add_parser('annotate', help='annotate tree with triples from RDF file') ann_parser.add_argument('file', help='annotation file') ann_parser.add_argument('format', help='annotation file format (default=ntriples)') ann_parser.add_argument('uri', help='tree uri', default=None) args = parser.parse_args() kwargs = {} if args.store: kwargs['storage_name'] = args.store if args.dsn: kwargs['dsn'] = args.dsn if args.user: kwargs['user'] = args.user if args.password: kwargs['password'] = args.password treestore = Treestore(**kwargs) if args.command == 'add': # parse a tree and add it to the treestore treestore.add_trees(args.file, args.format, args.uri, bulk_loader=args.bulk, puid=args.puid, rooted=args.rooted) elif args.command == 'get': # get a tree, serialize in specified format, and output to stdout print treestore.serialize_trees(args.uri, args.format), elif args.command == 'rm': # remove a certain tree from the treestore treestore.remove_trees(args.uri) elif args.command == 'ls': # list all trees in the treestore or trees containing a list of taxa contains = args.contains if contains: contains = set([s.strip() for s in contains.split(',')]) trees = [r for r in treestore.list_trees_containing_taxa( contains=contains, match_all=args.all)] else: trees = treestore.list_trees() if not trees: exit() import lscolumns lscolumns.printls(trees) elif args.command == 'names': print treestore.get_names(tree_uri=args.uri, format=args.format) elif args.command == 'count': print len([r for r in treestore.get_names(tree_uri=args.uri, format=None)]) elif args.command == 'query': contains = set([s.strip() for s in args.contains.split(',')]) print treestore.get_subtree(contains=contains, tree_uri=args.uri, match_all=args.all, format=args.format, prune=not args.complete), elif args.command == 'annotate': annotate(args.uri, args.file, treestore, format=args.format)
def main(): """This function launches the EcoData Retriever.""" if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == 'gui'): # if no command line args are passed, launch GUI check_for_updates(graphical=False if current_platform == 'darwin' else True) lists = get_lists() from retriever.app.main import launch_app launch_app(lists) else: # otherwise, parse them script_list = SCRIPT_LIST() args = parser.parse_args() if args.quiet: sys.stdout = open(os.devnull, 'w') if args.command == 'help': parser.parse_args(['-h']) if hasattr(args, 'compile') and args.compile: script_list = SCRIPT_LIST(force_compile=True) if args.command == 'update': check_for_updates(graphical=False) script_list = SCRIPT_LIST() return elif args.command == 'citation': if args.dataset is None: citation_path = os.path.join(os.path.split(__file__)[0], '../CITATION') print "\nCitation for retriever:\n" with open(citation_path) as citation_file: print citation_file.read() else: scripts = name_matches(script_list, args.dataset) for dataset in scripts: print ("\nCitation: {}".format(dataset.citation)) print ("Description: {}\n".format(dataset.description)) return elif args.command == 'gui': lists = get_lists() from retriever.app.main import launch_app launch_app(lists) return elif args.command == 'new': f = open(args.filename, 'w') f.write(sample_script) f.close() return elif args.command == 'reset': reset_retriever(args.scope) return if args.command == 'ls' or args.dataset is None: # If scripts have never been downloaded there is nothing to list if not script_list: print "No scripts are currently available. Updating scripts now..." check_for_updates(graphical=False) print "\n\nScripts downloaded.\n" script_list = SCRIPT_LIST() all_scripts = [] for script in script_list: if script.name: if args.l!=None: script_name = script.name + "\nShortname: " + script.shortname+"\n" if script.tags: script_name += "Tags: "+str([tag for tag in script.tags])+"\n" not_found = 0 for term in args.l: if script_name.lower().find(term.lower()) == -1: not_found = 1 break if not_found == 0: all_scripts.append(script_name) else: script_name = script.shortname all_scripts.append(script_name) all_scripts = sorted(all_scripts, key=lambda s: s.lower()) print "Available datasets : {}\n".format(len(all_scripts)) if args.l==None: import lscolumns lscolumns.printls(sorted(all_scripts, key=lambda s: s.lower())) else: count = 1 for script in all_scripts: print ("%d. %s"%(count, script)) count += 1 return engine = choose_engine(args.__dict__) if hasattr(args, 'debug') and args.debug: debug = True else: debug = False scripts = name_matches(script_list, args.dataset) if scripts: for dataset in scripts: print "=> Installing", dataset.name try: dataset.download(engine, debug=debug) dataset.engine.final_cleanup() except KeyboardInterrupt: pass except Exception as e: print e if debug: raise print "Done!" else: print "The dataset {} isn't currently available in the Retriever".format(args.dataset) print "Run 'retriever ls to see a list of currently available datasets"