def test_filesystem(tmp_path, database): """Test loading and using a queue with the filesystem database. """ from rse.main import Encyclopedia config_dir = os.path.join(str(tmp_path), "software") os.mkdir(config_dir) config_file = os.path.join(config_dir, "rse.ini") # System exit if doesn't exist with pytest.raises(SystemExit): enc = Encyclopedia(config_file=config_file) enc = Encyclopedia(config_file=config_file, generate=True, database=database) assert enc.config.configfile == config_file assert os.path.exists(config_dir) assert enc.config_dir == config_dir assert enc.config.configfile == os.path.join(enc.config_dir, "rse.ini") assert enc.database == database assert enc.db.database == database if database == "filesystem": assert enc.db.data_base == os.path.join(config_dir, "database") # Test list, empty without anything assert not enc.list() # Add a repo repo = enc.add("github.com/singularityhub/sregistry") assert len(enc.list()) == 1 # enc.get should return last repo, given no id lastrepo = enc.get() assert lastrepo.uid == repo.uid # Summary enc.summary() enc.summary("github.com/singularityhub/sregistry") enc.analyze("github.com/singularityhub/sregistry") enc.analyze_bulk() # Clean up a specific repo (no prompt) enc.clear(repo.uid, noprompt=True) assert len(enc.list()) == 0 enc.clear(noprompt=True) assert not enc.list() # Get the taxonomy or criteria enc.list_taxonomy() enc.list_criteria()
def start(port=5000, debug=True, client=None, host=None, level="DEBUG", disable_annotate=False): """Start can be invoked when this file is executed (see __main__ below) or used as a function to programmatically start a server. If started via rse start, we can add the encyclopedia client to the server. If you want to change the hostname, set the environment variable RSE_HOSTNAME or set on command line with rse start. """ host = host or RSE_HOSTNAME bot.info( f"Research Software Encyclopedia: running on http://{host}:{port}") # If the user doesn't specify a queue, use default if not client: from rse.main import Encyclopedia client = Encyclopedia() # Customize the logger to log to the app folder file_handler = logging.FileHandler( os.path.join(client.config_dir, "dashboard.log")) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") file_handler.setFormatter(formatter) app.logger.addHandler(file_handler) app.logger.setLevel(getattr(logging, level)) # Add the queue to the server app.client = client app.disable_annotate = disable_annotate socketio.run(app, port=port, debug=debug, host=host)
def ipython(args): """give the user an ipython shell, optionally with an endpoint of choice.""" client = Encyclopedia(config_file=args.config_file) assert client from IPython import embed embed()
def summary(args, extra): """summary will return general counts for taxonomy, criteria, and users""" client = Encyclopedia(config_file=args.config_file, database=args.database) metrics = client.summary(args.repo) # Export only taxonomy metrics if args.metric_type == "taxonomy": updated = {} for field in ["taxonomy", "taxonomy-count", "repos"]: updated[field] = metrics[field] metrics = updated # Export only criteria metrics elif args.metric_type == "criteria": updated = {} for field in ["criteria", "criteria-count", "repos"]: updated[field] = metrics[field] metrics = updated # Export only user metrics elif args.metric_type == "users": updated = {} for field in ["users-count", "users", "repos"]: updated[field] = metrics[field] metrics = updated # Print metrics as json or table if metrics: print(json.dumps(metrics, indent=4))
def test_parsers_filesystem(tmp_path, database): """test each parser with the filesystem database. """ from rse.main import Encyclopedia config_dir = os.path.join(str(tmp_path), "software") os.mkdir(config_dir) config_file = os.path.join(config_dir, "rse.ini") enc = Encyclopedia(config_file=config_file, generate=True, database="database") # Each uid should map to a parser uids = [["github", "github.com/singularityhub/sregistry"]] for i, parts in enumerate(uids): parser = parts[0] uid = parts[1] repo = enc.get_or_create(uid) assert repo.parser.name == parser assert repo.filename == os.path.join(enc.config_dir, "database", repo.uid, "metadata.json") assert repo.summary() # repo.load includes the file dump, the upper level keys should be same content = repo.load() for key in ["parser", "uid", "url", "data"]: assert key in content # repo.export includes the executor specific data data = repo.export() assert "timestamp" in data
def python(args): import code client = Encyclopedia(config_file=args.config_file) assert client code.interact(local={"client": client})
def main(args, extra): # Create a queue object, run the command to match to an executor enc = Encyclopedia(config_file=args.config_file, database=args.database) # If a file is provided: if args.file: enc.bulk_update(args.file, rewrite=args.rewrite) else: enc.update(args.uid, rewrite=args.rewrite)
def main(args, extra): enc = Encyclopedia(config_file=args.config_file, database=args.database) # Grab the repository uid, label key and value uid, key, value = args.values try: enc.label(uid=uid, key=key, value=value, force=args.force) except Exception as exc: print(exc)
def main(args, extra): enc = Encyclopedia(config_file=args.config_file) # Case 1: empty list indicates listing all if not args.uid: bot.error("Please provide a unique resource identifier to search for.") else: if enc.exists(args.uid): print(f"{args.uid} exists.") else: print(f"{args.uid} does not exist.")
def main(args, extra): # Create an encyclopedia object enc = Encyclopedia(config_file=args.config_file, database=args.database) # Does the user want to search for repos with one or more topics? if not args.search: topics = enc.topics(pattern=args.pattern) print("\n".join(topics)) else: repos = enc.repos_by_topics(topics=args.search) print("\n".join(repos))
def create(self, database=None, config_file=None): """After a scrape (whether we obtain latest or a search query) we run create to create software repositories based on results. """ from rse.main import Encyclopedia client = Encyclopedia(config_file=config_file, database=database) for repo_id in self.results: repo = get_parser(repo_id) # Add results that don't exist if not client.exists(repo.uid): client.add(repo.uid)
def main(args, extra): # present working directory path = args.path if args.path == ".": path = os.getcwd() # directory must exist! if not os.path.exists(path): raise DirectoryNotFoundError(path) config_file = os.path.join(path, args.config_file) # generate the software repository in the base Encyclopedia(config_file=config_file, generate=True, database=args.database)
def main(args, extra): # Create a research software encyclopedia enc = Encyclopedia(config_file=args.config_file, database=args.database) # A uid is required here if not args.uid and not args.file: sys.exit("Please provide a software identifier or file to add.") # If a file is provided, add in bulk (skips over those already present) if args.file: enc.bulk_add(args.file) else: enc.add(args.uid)
def create(self, database=None, config_file=None): """After a scrape (whether we obtain latest or a search query) we run create to create software repositories based on results. """ from rse.main import Encyclopedia client = Encyclopedia(config_file=config_file, database=database) for result in self.results: uid = result["url"].split("//")[-1] repo = get_parser(uid) # Add results that don't exist if not client.exists(repo.uid): client.add(repo.uid) if result.get("doi"): client.label(repo.uid, key="doi", value=result.get("doi"))
def main(args, extra): enc = Encyclopedia(config_file=args.config_file, database=args.database) query = " ".join(args.query).strip() # We can search taxonomy, criteria, or both taxonomy = args.taxonomy or [] criteria = args.criteria or [] if not query and not taxonomy and not criteria: sys.exit("Please provide a query to search for.") results = enc.search(query, taxonomy=taxonomy, criteria=criteria) if results: for key, listing in results.items(): bot.info(key) bot.table(listing) bot.newline()
def main(args, extra): # Create a research software encyclopedia enc = Encyclopedia(config_file=args.config_file, database=args.database) try: repo = enc.get(args.uid) print(json.dumps(repo.load(), indent=4)) except NoReposError: bot.error( "There are no software repositories in the database! Use rse add to add some." ) except RepoNotFoundError: bot.error( f"{args.uid} does not exist in the database. Use rse add to add it." )
def main(args, extra): # Clear a parser, uid, or target enc = Encyclopedia(config_file=args.config_file, database=args.database) # Pass the encyclopedia object to start a server try: from rse.app.server import start start( port=args.port, client=enc, debug=args.debug, level=args.log_level, disable_annotate=args.disable_annotate, ) except: sys.exit( "You must 'pip install rse[app]' 'pip install rse[all]' to use the dashboard." )
def create(self, database=None, config_file=None): """After a scrape (whether we obtain latest or a search query) we run create to create software repositories based on results. """ from rse.main import Encyclopedia client = Encyclopedia(config_file=config_file, database=database) for result in self.results: uid = result["url"].split("//")[-1] # If a repository is added that isn't represented try: repo = get_parser(uid) except NotImplementedError as exc: bot.warning(exc) continue # Add results that don't exist if not client.exists(repo.uid): client.add(repo.uid) client.label(repo.uid, key="doi", value=result.get("doi"))
def main(args, extra): client = Encyclopedia(config_file=args.config_file, database=args.database) # Case 1: empty list indicates listing all if os.path.exists(args.path) and not args.force: bot.error(f"{args.path} already exists, use --force to overwrite it.") # Export a list of repos if args.export_type == "repos-txt": # We just want the unique id, the first result repos = [x[0] for x in client.list()] write_file(args.path, "\n".join(repos)) bot.info(f"Wrote {len(repos)} to {args.path}") # Static web export from flask to a directory elif args.export_type == "static-web": from rse.app.server import start from rse.app.export import export_web_static # Start the webserver on a separate process p = Process( target= start, # port, debug, client, host, log-level, disable_annotate args=(args.port, args.debug, client, args.host, args.log_level, True), ) p.start() # Do the export! export_web_static( export_dir=args.path, base_url="http://%s:%s" % (args.host, args.port), force=args.force, client=client, ) # Ensure that it stops! p.kill()
def analyze(args, extra): """analyze is intended to provide calculations for repos, by default including all taxonomy categories and criteria. For a custom set, the user should interact with the client directly. """ client = Encyclopedia(config_file=args.config_file, database=args.database) result = client.analyze(repo=args.repo, cthresh=args.cthresh, tthresh=args.tthresh) # Create rows for criteria and taxonomy clookup = {c["uid"]: c for c in client.list_criteria()} tlookup = {t["uid"]: t for t in client.list_taxonomy()} crows = [[response, clookup[uid]["name"]] for uid, response in result["criteria"].items()] trows = [[str(count), tlookup[uid]["name"]] for uid, count in result["taxonomy"].items()] bot.info(f"Summary for {result['repo']}") bot.info("\nCriteria") bot.table(crows) bot.info("\nTaxonomy") bot.table(trows)
def bpython(args): import bpython client = Encyclopedia(config_file=args.config_file) assert client bpython.embed(locals_={"client": client})