def greet(): """Greet someone.""" # The greeting greeting: Option = default("Hello") # The name to greet name: Option = default("you") return f"{greeting}, {name}!"
def command_search(): """Query the Microsoft Academic database.""" # File containing the collection # [alias: -c] collection: Option & PapersFile = default(None) # Researchers file (JSON) # [alias: -r] researchers: Option & ResearchersFile = default(None) # Command to run on every paper command: Option = default(None) # Display long form for each paper long: Option & bool = default(False) papers = search(collection=collection, researchers=researchers) sch = search_commands if collection is None else search_commands_with_coll for paper in papers: instruction = sch.process_paper( paper, command=command, collection=collection, formatter=Paper.format_term_long if long else Paper.format_term, ) if instruction is False: break if collection is not None: collection.save()
def command_search(): """Query the Microsoft Academic database.""" # File containing the collection # [alias: -c] collection: Arg & PapersFile = default(None) # Researchers file (JSON) # [alias: -r] researchers: Arg & ResearchersFile = default(None) # Command to run on every paper command: Arg = default(None) papers = search(collection=collection, researchers=researchers) sch = search_commands if collection is None else search_commands_with_coll for paper in papers: instruction = sch.process_paper(paper, command=command, collection=collection) if instruction is False: break if collection is not None: collection.save()
def guess(): # Minimal possible number minimum: Option & int = default(0) # Maximal possible number maximum: Option & int = default(100) # [group: whimsy] # Maximal number of tries maxtries: Option & int = default(10) # Force the number to guess (defaults to random) target: Option & int = default(random.randint(minimum, maximum)) assert minimum <= target <= maximum print(f"Please guess a number between {minimum} and {maximum}") for i in range(maxtries): guess = float(input("? ")) if guess == target: print("Yes! :D") return True elif i == maxtries - 1: print("You failed :(") return False elif guess < target: print("Too low. Guess again.") elif guess > target: print("Too high. Guess again.")
def init_torch( # Seed to use for random numbers seed: Argument & int = default(1234), # Use CUDA for this model cuda: Argument & bool = default(None), # Number of threads for PyTorch workers: Argument & int = default(None), ): np.random.seed(seed) torch.manual_seed(seed) torch.set_num_threads(workers or 1) if cuda is None: cuda = torch.cuda.is_available() if cuda: torch.cuda.manual_seed_all(seed) return NS( device=torch.device("cuda" if cuda else "cpu"), cuda=cuda, sync=torch.cuda.synchronize if cuda else None, workers=workers, seed=seed, )
def command_run(subargv): """Run a benchmark. Positional argument must point to a Python function in a module, using the entry point syntax. """ # [positional] # Name of the experiment to run (e.g. milarun.models.polynome:main) function: Argument # File/directory where to put the results. Assumed to be a directory # unless the name ends in .json # [metavar: PATH] # [alias: -o] out: Argument = default(None) out = out and os.path.realpath(os.path.expanduser(out)) # Name of the experiment (optional) experiment_name: Argument = default(None) # ID of the job (optional) job_id: Argument = default(None) # Extra information (optional) extra: Argument = default("{}") extra = json.loads(extra) # Root directory for datasets (default: $MILARUN_DATAROOT) # [metavar: PATH] # [alias: -d] dataroot: Argument = default(os.getenv("MILARUN_DATAROOT")) if not dataroot: print( "milarun: error: no dataroot specified, ", "please use --dataroot/-d or set $MILARUN_DATAROOT", file=sys.stderr ) sys.exit(1) dataroot = os.path.realpath(os.path.expanduser(dataroot)) run = resolve(function) experiment = Experiment( name=experiment_name or function, job_id=job_id, dataroot=dataroot, outdir=out, ) experiment.set_fields(extra) experiment["call"] = { "function": function, "argv": subargv, } with experiment.time("program"): experiment.execute(lambda: run(experiment, subargv)) experiment.write(out)
def main(): # The greeting greeting: Option = default("Hello") # The name to greet name: Option = default("you") return f"{greeting}, {name}!"
def main(exp): # Dataset to use dataset: Argument # super resolution upscale factor upscale_factor: Argument & int = default(2) # # testing batch size (default: 10) # test_batch_size: Argument & int = default(10) # Learning rate (default: 0.1) lr: Argument & float = default(0.1) # Batch size (default: 64) batch_size: Argument & int = default(64) torch_settings = init_torch() device = torch_settings.device print('===> Loading datasets') # dataset_instance = exp.resolve_dataset("milabench.presets:bsds500") # folder = dataset_instance["environment"]["root"] sets = get_dataset(exp, dataset, upscale_factor) train_set = sets.train # train_set = get_dataset(os.path.join(folder, "bsds500/BSR/BSDS500/data/images/train"), upscale_factor) # test_set = get_dataset(os.path.join(folder, "bsds500/BSR/BSDS500/data/images/test"), upscale_factor) training_data_loader = DataLoader(dataset=train_set, num_workers=torch_settings.workers, batch_size=batch_size, shuffle=True) # testing_data_loader = DataLoader( # dataset=test_set, # num_workers=torch_settings.workers, # batch_size=test_batch_size, # shuffle=False # ) print('===> Building model') model = Net(upscale_factor=upscale_factor).to(device) model.train() criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=lr) wrapper = iteration_wrapper(exp, sync=torch_settings.sync) for it, (input, target) in dataloop(training_data_loader, wrapper=wrapper): it.set_count(batch_size) input = input.to(device) target = target.to(device) optimizer.zero_grad() loss = criterion(model(input), target) it.log(loss=loss.item()) loss.backward() optimizer.step()
def accum(): # [action: append] junk: Option = default([]) # [action: append] # [nargs: +] clusters: Option = default([]) return junk, clusters
def run(): """Scrape websites for information.""" # File containing the sites to scrape and the scraping rules # [aliases: -s] sites: Argument & ConfigFile = default({}) # Output file or directory to save the results # [aliases: -o] out: Argument = default(None) # Format to output the results in # [aliases: -f] format: Argument = default("old") # Site to generate for site: Argument = default(None) now = str(datetime.now()) results = [] for sitename, sitecfg in sites.read().items(): if site and site != sitename: continue for i, url in enumerate(sitecfg["urls"]): if isinstance(url, dict): urlinfo = dict(url["info"]) url = url["url"] else: urlinfo = {} urlinfo.setdefault("urlkey", str(i)) urlinfo["urlkey"] = f"{sitename}-{urlinfo['urlkey']}" info = {**sitecfg["info"], **urlinfo, "time": now, "url": url} results += extract_sections(url, info, sitecfg) if format == "old": outdir = out or "covidfaq/scrape" files = defaultdict(dict) for entry in results: d = files[entry["urlkey"]] d["document_URL"] = entry["url"] del entry["urlkey"] d[entry["title"]] = entry os.makedirs(outdir, exist_ok=True) for filename, data in files.items(): filename = os.path.join(outdir, filename + ".json") page_to_json(data, filename) elif format == "new": outfile = out or "scrape_results.json" page_to_json(results, outfile) else: print(f"Unknown format: {format}") sys.exit(1)
def search_ext(): # File containing the collection # [alias: -c] collection: Option & PapersFile = default(None) # Researchers file (JSON) # [alias: -r] researchers: Option & ResearchersFile = default(None) return search(collection=collection, researchers=researchers)
def patriotism(): # Whether to wave the flag or not # [false-options] # [aliases: -f --yay] flag: Option & bool = default(True) # [options: -n] times: Option & int = default(1) if flag: return "wave" * times else: return "don't wave"
def iteration_wrapper( experiment, sync=None, # Maximum count before stopping max_count: Argument & int = default(1000), # Number of seconds for sampling items/second sample_duration: Argument & float = default(0.5), ): return experiment.chronos.create( "train", type="rate", sync=sync, sample_duration=sample_duration, max_count=max_count, )
def boo(): # [negate: --clap] # No jeering jeer: Option & bool = default(True) # [negate] # Lack of goodness good: Option & bool = default(True) # Potato! # [false-options: --famine] # [false-options-doc: No potato] potato: Option & bool = default(None) return jeer, good, potato
def command_dataset(subargv): """Download a dataset. Positional argument must point to a Python function in a module, using the entry point syntax. """ # Name(s) of the dataset(s) to download (e.g. milarun.datasets:mnist) # [positional: +] name: Argument & resolve # Root directory for datasets (default: $MILARUN_DATAROOT) # [metavar: PATH] # [alias: -d] dataroot: Argument = default(os.getenv("MILARUN_DATAROOT")) if not dataroot: print( "milarun: error: no dataroot specified, ", "please use --dataroot/-d or set $MILARUN_DATAROOT", file=sys.stderr ) sys.exit(1) dataroot = os.path.realpath(os.path.expanduser(dataroot)) for dataset_gen in name: dataset = dataset_gen(dataroot) dataset.download()
def main(): """Guessing game""" # Number of rounds of guessing rounds: Option & int = default(1) for i in range(rounds): guess()
def config_add(): """Add a new remote or edit an existing one.""" # Name of the remote # [positional] name: Option # URL of the remote # [positional] url: Option # Port to connect to # [alias: -p] port: Option = default(22) cfg = get_config("remotes.json") if "@" in url: cfg[name] = _cfg_from_url(url, port) else: cfg[name] = { "type": "local", "url": "localhost", "port": None, "paths": {os.getenv("HOME"): os.path.realpath(os.path.expanduser(url))}, } write_config("remotes.json", cfg)
def command_config(): """Configure paperoni.""" cfg = get_config() or {} orig_cfg = dict(cfg) key: Option & str = default(None) if key is None: print( T.bold("Note:"), "You need a Microsoft Academic Search API key in order to use this program.", "Free tier API keys will afford you 5000 to 10000 queries", "per month which is more than enough for personal use.", "You can get one by subscribing here:", ) print() print( " https://msr-apis.portal.azure-api.net/products/project-academic-knowledge" ) print() print("Once you have an API key, paste it below:") print() key = get_config("key") key = input(T.cyan(f"Enter MS Academic API key [{key}]: ")) or key cfg["key"] = key if cfg != orig_cfg: write_config(cfg)
def main(exp): # Number of examples per batch batch_size: Argument & int = default(256) # Dataset to load dataset: Argument torch_settings = init_torch() dataset = exp.get_dataset(dataset) loader = torch.utils.data.DataLoader( dataset.train, batch_size=batch_size, shuffle=True, num_workers=torch_settings.workers, pin_memory=True ) wrapper = iteration_wrapper(exp, sync=None) # Warm up a bit for _, batch in zip(range(10), loader): for item in batch: item.to(torch_settings.device) break for it, batch in dataloop(loader, wrapper=wrapper): it.set_count(batch_size) it.log(eta=True) batch = [item.to(torch_settings.device) for item in batch] if torch_settings.sync: torch_settings.sync()
def command_collect(): """Collect papers from the Microsoft Academic database.""" # File containing the collection # [alias: -c] collection: Arg & PapersFile # Researchers file (JSON) # [alias: -r] researchers: Arg & ResearchersFile = default(None) # Command to run on every paper command: Arg = default(None) # Prompt for papers even if they were excluded from the collection show_excluded: Arg & bool = default(False) # Include all papers from the collection # [options: --yes] yes_: Arg & bool = default(False) if yes_: command = "y" # Exclude all papers from the collection # [options: --no] no_: Arg & bool = default(False) if no_: command = "n" papers = search(researchers=researchers) for paper in papers: if paper in collection: continue if not show_excluded and collection.excludes(paper): continue instruction = search_commands.process_paper( paper, collection=collection, command=command, ) if instruction is False: break collection.save()
def main(exp): # Model float type dtype: Argument & str = default("float32") # Number of samples samples: Argument & int = default(100) torch_settings = init_torch() device = torch_settings.device data = generate_wave_data(20, 1000, samples) _dtype = to_type[dtype] input = torch.from_numpy(data[3:, :-1]).to(device=device, dtype=_dtype) target = torch.from_numpy(data[3:, 1:]).to(device=device, dtype=_dtype) test_input = torch.from_numpy(data[:3, :-1]).to(device=device, dtype=_dtype) test_target = torch.from_numpy(data[:3, 1:]).to(device=device, dtype=_dtype) # build the model seq = Sequence().to(device=device, dtype=_dtype) criterion = nn.MSELoss().to(device=device, dtype=_dtype) optimizer = optim.SGD(seq.parameters(), lr=0.01) total_time = 0 seq.train() wrapper = iteration_wrapper(exp, sync=torch_settings.sync) for it, _ in dataloop(count(), wrapper=wrapper): it.set_count(samples) def closure(): optimizer.zero_grad() out = seq(input.to(device=device, dtype=_dtype)) loss = criterion(out, target) loss.backward() it.log(loss=loss.item()) return loss optimizer.step(closure)
def command_researcher(): """Register ids/statuses for a researcher.""" # Researchers file (JSON) # [alias: -f -r] researchers: Option & ResearchersFile # Name of the researcher # [alias: -a] # [nargs: +] author: Option & str author = " ".join(author) # Find IDs for the researcher find_ids: Option & bool = default(False) data = researchers.get(author) original = deepcopy(data.data) if find_ids: _find_ids(data) researchers.save() else: while True: print(T.bold("Current data about"), T.bold_cyan(author)) print(json.dumps(data.data, indent=2)) print() print(T.bold("What do you want to do?")) print(T.bold_yellow("(1)"), "Find ids") print(T.bold_yellow("(2)"), "Set a property") print(T.bold_yellow("(3)"), "Add a role") print(T.bold_yellow("(*)"), "Quit (any other key)") task = input(T.bold("> ")) if task == "1": print() _find_ids(data) elif task == "2": print() _set_property(data) elif task == "3": print() _add_role(data) else: break if data.data != original: save = input(T.bold("Save changes? [y]/n ")).strip() if save == "y" or save == "": researchers.save()
def config_ignore(): """Add/remove global ignore patterns.""" # Patterns to ignore # [positional: *] patterns: Option = default([]) # List the ignores # [alias: -l] list: Option & bool = default(False) # Remove the ignores # [alias: -r] # [nargs: *] remove: Option = default([]) ign = get_config_path("ignore") if list: print(open(ign).read(), end="") sys.exit(0) if not patterns: edit_config("ignore") sys.exit(0) lines = readlines(ign) new_lines = [] for line in lines: if line in remove: print(f"-{line}") else: print(f" {line}") new_lines.append(line) no_add = {*lines, *remove} for pattern in patterns: if pattern not in no_add: print(f"+{pattern}") lines.append(pattern) writelines(ign, lines)
def config_path(): """Edit path mappings for a remote.""" # Name of the remote # [positional] name: Option # Source path # [positional: ?] source: Option # Destination path # [positional: ?] dest: Option # List path mappings # [alias: -l] list: Option & bool = default(False) # Whether to remove the path # [alias: -r] remove: Option = default(None) cfg = get_config("remotes.json") remote = _check_remote(cfg, name, msg="Nothing to do") if list: for s, d in _sort_paths(remote): print(f"{s:30}:{d}") elif remove is not None: if remove not in remote["paths"]: q(f"Source path '{remove}' is not mapped") del remote["paths"][remove] write_config("remotes.json", cfg) else: if source is None: q("SOURCE must be specified") if dest is None: q("DEST must be specified") paths = remote["paths"] paths[source] = dest write_config("remotes.json", cfg)
def stout(v): # Double you, # Double me w: Option & int = default(1) # This is your cue # [metavar: CUE] q: Option & int = 2 a = lager(v, w) b = lager(v, q) return a, b
def command_collect(): """Collect papers from the Microsoft Academic database.""" # File containing the collection # [alias: -c] collection: Option & PapersFile # Researchers file (JSON) # [alias: -r] researchers: Option & ResearchersFile = default(None) # Command to run on every paper command: Option = default(None) # Prompt for papers even if they were excluded from the collection show_excluded: Option & bool = default(False) # Display long form for each paper long: Option & bool = default(False) # Update existing papers with new information update: Option & bool = default(False) # Include all papers from the collection # [options: --yes] yes_: Option & bool = default(False) if yes_: command = "y" # Exclude all papers from the collection # [options: --no] no_: Option & bool = default(False) if no_: command = "n" papers = search(researchers=researchers) for paper in papers: if paper in collection: if update: collection.add(paper) continue if not show_excluded and collection.excludes(paper): continue instruction = search_commands.process_paper( paper, collection=collection, command=command, formatter=Paper.format_term_long if long else Paper.format_term, ) if instruction is False: break collection.save()
def main(exp): # dataset to use dataset: Argument # batch size batch_size: Argument & int = default(32) # path to model checkpoint file checkpoint: Argument = default(None) torch_settings = init_torch() wrapper = iteration_wrapper(exp, sync=torch_settings.sync) args = NS( dataset=dataset, checkpoint=checkpoint, batch_size=batch_size, torch_settings=torch_settings, wrapper=wrapper, ) train300_mlperf_coco(exp, args)
def command_summary(subargv): """Output a JSON summary of the results of the jobs command.""" # Directory in which the reports are # [positional] reports: Argument # JSON file where to output the summary # [alias: -o] out: Argument = default(None) reports = os.path.realpath(os.path.expanduser(reports)) out = out and os.path.realpath(os.path.expanduser(out)) results = summarize(reports, filter=_filter, group=_group) if out: json.dump(results, open(out, "w"), indent=4) else: print(json.dumps(results, indent=4))
def command_rerun(subargv): """Re-run a benchmark, using the JSON output of a previous run.""" # JSON results file # [positional] job: Argument & config # File/directory where to put the results. Assumed to be a directory # unless the name ends in .json # [alias: -o] out: Argument = default(None) argmap = { "dataroot": "--dataroot", "name": "--experiment-name", } jc = job["call"] cmd = [ "milarun", "run", ] if out: cmd += ["--out", out] for k, v in argmap.items(): if job[k] is not None: cmd += [v, job[k]] cmd += [ jc["function"], "--", *jc["argv"], *subargv ] print("=== re-run command ===") for k, v in job["environ"].items(): print(f"{k} = {v}") print(" ".join(cmd)) print("======================") subprocess.run(cmd, env={**os.environ, **job["environ"]})
def command_report(subargv): """Output a report from the results of the jobs command.""" # Directory in which the reports are # [positional] reports: Argument # Comparison file compare: Argument & config = default(None) # Weights file to compute the score weights: Argument & config = default(None) # Path to the HTML file to generate html: Argument = default(None) # Compare the configuration's individual GPUs compare_gpus: Argument & bool = default(False) # Price of the configuration, to compute score/price ratio price: Argument & float = default(None) # Title to give to the report title: Argument = default(None) reports = os.path.realpath(os.path.expanduser(reports)) if os.path.isdir(reports): results = summarize(reports, filter=_filter, group=_group) else: results = json.load(open(reports)) make_report( results, compare=compare, weights=weights, html=html, compare_gpus=compare_gpus, price=price, title=title, )