def test_evaluate_file(self): json_str = _jsonnet.evaluate_file( self.input_filename, import_callback=import_callback, native_callbacks=native_callbacks, ) self.assertEqual(json_str, self.expected_str)
def config_load(filename, ext_vars): try: text = _jsonnet.evaluate_file( filename, max_trace=100, ext_vars=ext_vars, import_callback=jsonnet_import_callback) except RuntimeError as e: # Error from Jsonnet sys.stderr.write(e.message) sys.stderr.write('\n') sys.exit(1) config = json.loads(text) try: config_check(config) except validate.ConfigError as e: if debug: traceback.print_exc() else: sys.stderr.write('Config error: %s\n' % e.message) if e.note: sys.stderr.write('%s\n' % e.note) sys.exit(1) return config
def from_file(params_file: str, params_overrides: str = "", ext_vars: dict = None) -> 'Params': """ Load a `Params` object from a configuration file. Parameters ---------- params_file : ``str`` The path to the configuration file to load. params_overrides : ``str``, optional A dict of overrides that can be applied to final object. e.g. {"model.embedding_dim": 10} ext_vars : ``dict``, optional Our config files are Jsonnet, which allows specifying external variables for later substitution. Typically we substitute these using environment variables; however, you can also specify them here, in which case they take priority over environment variables. e.g. {"HOME_DIR": "/Users/allennlp/home"} """ if ext_vars is None: ext_vars = {} # redirect to cache, if necessary params_file = cached_path(params_file) ext_vars = {**dict(os.environ), **ext_vars} file_dict = json.loads(evaluate_file(params_file, ext_vars=ext_vars)) overrides_dict = parse_overrides(params_overrides) param_dict = with_fallback(preferred=overrides_dict, fallback=file_dict) return Params(param_dict)
def ExpandFile(self, file_name): # Jsonnet interpreter, import only if needed to avoid dependency. try: import _jsonnet except: raise JsonnetNotFoundError('Module "_jsonnet" missing; Is _jsonnet.so in your $PYTHONPATH?') project = self.__kwargs['project'] json_str = _jsonnet.evaluate_file(file_name, env={'GCP_PROJECT': project}) json_data = json.loads(json_str) return json_data['resources']
def config_load(filename): try: text = _jsonnet.evaluate_file(filename, max_trace=100) except RuntimeError as e: # Error from Jsonnet sys.stderr.write(e.message) sys.stderr.write('\n') sys.exit(1) config = json.loads(text) try: config_check(config) except ConfigError as e: sys.stderr.write(e.message) sys.stderr.write('\n') sys.exit(1) return config
from __future__ import absolute_import, division, print_function import json import sys import _jsonnet from kpm.render_jsonnet import RenderJsonnet from kpm.template_filters import jsonnet_callbacks #r = RenderJsonnet() #result = r.render_jsonnet(open(sys.argv[1]).read()) def native_bool(b): return ['true', True, False, 1, 0] json_str = _jsonnet.evaluate_file( sys.argv[1], native_callbacks={"nativeBool": (("bool", ), native_bool)}, ) sys.stdout.write(json_str) #sys.stdout.write(json.dumps(result))
def main(): if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') torch.set_num_threads(1) if args.config_args: config = json.loads( _jsonnet.evaluate_file(args.config, tla_codes={'args': args.config_args})) else: config = json.loads(_jsonnet.evaluate_file(args.config)) if 'model_name' in config: args.logdir = os.path.join(args.logdir, config['model_name']) output_path = args.output.replace('__LOGDIR__', args.logdir) if os.path.exists(output_path): print('Output file {} already exists'.format(output_path)) sys.exit(1) # 0. Construct preprocessors model_preproc = registry.instantiate( registry.lookup('model', config['model']).Preproc, config['model']) model_preproc.load() # 1. Construct model model = registry.construct('model', config['model'], preproc=model_preproc, device=device) model.to(device) model.eval() model.visualize_flag = False optimizer = registry.construct('optimizer', config['optimizer'], params=model.parameters()) # 2. Restore its parameters saver = saver_mod.Saver(model, optimizer) last_step = saver.restore(args.logdir, step=args.step, map_location=device) if not last_step: raise Exception('Attempting to infer on untrained model') # 3. Get training data somewhere output = open(output_path, 'w') data = registry.construct('dataset', config['data'][args.section]) if args.limit: sliced_data = itertools.islice(data, args.limit) else: sliced_data = data with torch.no_grad(): if args.mode == 'infer': orig_data = registry.construct('dataset', config['data'][args.section]) preproc_data = model_preproc.dataset(args.section) if args.limit: sliced_orig_data = itertools.islice(data, args.limit) sliced_preproc_data = itertools.islice(data, args.limit) else: sliced_orig_data = orig_data sliced_preproc_data = preproc_data assert len(orig_data) == len(preproc_data) infer(model, args.beam_size, args.output_history, sliced_orig_data, sliced_preproc_data, output) elif args.mode == 'debug': data = model_preproc.dataset(args.section) if args.limit: sliced_data = itertools.islice(data, args.limit) else: sliced_data = data debug(model, sliced_data, output) elif args.mode == 'visualize_attention': model.visualize_flag = True model.decoder.visualize_flag = True data = registry.construct('dataset', config['data'][args.section]) if args.limit: sliced_data = itertools.islice(data, args.limit) else: sliced_data = data visualize_attention(model, args.beam_size, args.output_history, sliced_data, output)
import os import sys import json from _jsonnet import evaluate_file # Jsonnet file to evaluate params_file = sys.argv[1] # Filepath of output file outfile = sys.argv[2] # Environment variables ext_vars = dict(os.environ) file_dict = json.loads(evaluate_file(params_file, ext_vars=ext_vars)) with open(outfile, "w") as handle: json.dump(file_dict, handle, indent=4)
import random import subprocess import torch import logging import json import _jsonnet import pathlib from collections import defaultdict from sklearn.metrics import precision_recall_fscore_support MODEL_CONFIG = sys.argv[1] MODELS_DIR = pathlib.Path(sys.argv[2]) SEMI_SUPERVISED_UNLABELLED_FILE = sys.argv[3] SEED = sys.argv[4] VAL_DATA_PATH = pathlib.Path( json.loads(_jsonnet.evaluate_file(MODEL_CONFIG))['validation_data_path']) random.seed(SEED) MODELS_DIR.mkdir(parents=True, exist_ok=True) # Generate the seeds for all the models in this tri training run. # We opt to use same seed to control for intialization, so only tri-training dataset can affect the performance in a given run PYTORCH_SEED = random.randint(0, 10000) NUMPY_SEED = random.randint(0, 10000) RANDOM_SEED = random.randint(0, 10000) logger = logging.getLogger(__file__) logger.info('START')
variable_mention_to_variable_id=variable_mention_to_variable_id, variable_mention_mask=variable_mention_mask, variable_mention_num=variable_mention_num, variable_encoding_mask=variable_encoding_mask, target_type_src_mems=target_type_src_mems, src_type_id=src_type_id, target_mask=target_mask, target_submask=target_subtype_id > 0, target_type_sizes=target_type_sizes, ), dict( tgt_var_names=sum([e.tgt_var_names for e in examples], []), target_type_id=target_type_id, target_name_id=target_name_id, target_subtype_id=target_subtype_id, target_mask=target_mask, test_meta=[e.test_meta for e in examples], ), ) if __name__ == "__main__": config = json.loads(_jsonnet.evaluate_file("config.xfmr.jsonnet")) dataset = Dataset("data1/dev-*.tar", config["data"]) dataloader = torch.utils.data.DataLoader(dataset, num_workers=8, batch_size=64, collate_fn=Dataset.collate_fn) for x in dataloader: pass
import sys import json import _jsonnet if __name__ == "__main__": config_to_edit_path = sys.argv[1] jsonnet_config_path = sys.argv[2] with open(config_to_edit_path) as f1: config_to_edit = json.load(f1) jsonnet_config = json.loads(_jsonnet.evaluate_file(jsonnet_config_path)) for key in ["random_seed", "numpy_seed", "pytorch_seed"]: config_to_edit[key] = jsonnet_config[key] with open(config_to_edit_path, "w") as f1: json.dump(config_to_edit, f1, indent=4)
def parse_parameter_json(parameters_path: str): parameters = json.loads(_jsonnet.evaluate_file(parameters_path)) keys = parameters.keys() values = [parameters[k] for k in keys] for combinations in itertools.product(*values): yield dict(zip(keys, combinations))
import sys import json import _jsonnet as jsonnet import os.path as _p from jsonschema2db import JSONSchemaToPostgres input_file_path = sys.argv[-1] assert _p.splitext(input_file_path)[-1] in ('.jsonnet', '.json') json_string = jsonnet.evaluate_file(input_file_path) schema = json.loads(json_string) class DummyConnection: sql_history = [] @classmethod def get_statements(cls): return cls.sql_history def cursor(connection): class Lifecycle: class DummyCursor: def execute(self, query, args): connection.sql_history.append((query, args)) def __enter__(self): return self.DummyCursor() def __exit__(self, *args): pass
def setup(args): """ Create the blackbox function to optimize. This is a complex function that wraps the true parameter setting and training in subprocess calls to allennlp. """ base_config = json.loads(_jsonnet.evaluate_file(args.base_config_path)) search_config = json.loads(_jsonnet.evaluate_file(args.search_config_path)) arg_overrides = parse_overrides(args.overrides) # Flatten configs and get shorthand mappings flat_base_config = flatten(base_config) flat_search_config = flatten(search_config) shorthands = get_shorthands(flat_search_config) # Extract any variable dimensions and the mapping to their keys search_space = extract_search_space(flat_search_config) lambdas = extract_lambdas(flat_search_config) dimensions = list(search_space.values()) # We no longer use the base config as an initial point because the base config # needs to be minimal -- cannot contain fields which aren't used by certain hp # configurations since overrides cannot "delete" a field in the base config. x0 = None # get_x0(flat_base_config, search_space) trial_num = 0 trial_paths = dict() # Construct f def f(x): nonlocal trial_num nonlocal trial_paths # Map the x to the config keys that need updated newx = [] for d,p in zip(dimensions, x): print(d.name, d, p, type(p)) if 'numpy' in str(type(p)): p = p.item() newx.append(p) x = newx overrides = skopt.utils.point_asdict(search_space, x) overrides = fill_search_constants(overrides, flat_search_config) overrides = restrict_type_overrides(overrides, flat_search_config) # print(f'Overrides after fill and restrict: {json.dumps(overrides, indent=2)}') # Construct the trial serialization path trial_str = construct_trial_name(overrides, shorthands, trial_num) trial_path = os.path.join(args.serialization_dir, trial_str) trial_paths[trial_num] = trial_path # Construct the overrides string processed_overrides = format_overrides(overrides, lambdas, base_config, arg_overrides) print(f'Sampled config: {json.dumps(processed_overrides, indent=2)}') override_str = json.dumps(processed_overrides, indent=None) # Run Allennlp train subprocess cmd = f"allennlp train {args.base_config_path} -f -s {trial_path} -o '{override_str}' --file-friendly-logging --include-package {args.include_package}" print(f'CMD: {cmd}') try: subprocess.check_call(cmd, shell=True) except Exception as e: logger.error(e, exc_info=True) raise e trial_num += 1 # Retrieve the best validation metric and return that value metrics = json.load(open(os.path.join(trial_path, 'metrics.json'))) validation_metric = base_config['trainer']['validation_metric'] negate = validation_metric.startswith('+') validation_metric = validation_metric.lstrip('+-') y = metrics[f'best_validation_{validation_metric}'] if negate: y = -y return y # Construct a callback which maintains only the best weights/archive def delete_worse_files_cb(results): """ Remove .th and .gz files for any trials that aren't the best so far. """ nonlocal trial_num nonlocal trial_paths logger.info(f'DELETE WORSE FILES, trial num:{trial_num}') best_trial_num = np.argmin(results.func_vals).item() logger.info(f'Func values: {results.func_vals}, best is {best_trial_num} with path {trial_paths[best_trial_num]}') for i in range(trial_num): if i != best_trial_num: logger.info(f'Deleting .th and .gz files at {trial_paths[i]}') th_path = os.path.join(trial_paths[i], '*.th') gz_path = os.path.join(trial_paths[i], '*.gz') cmd = f"rm -f {th_path} && rm -f {gz_path}" try: subprocess.check_call(cmd, shell=True) except Exception as e: logger.error(e, exc_info=True) raise e return f, dimensions, x0, trial_paths, delete_worse_files_cb
import argparse import os import psutil import _jsonnet from cv2 import cv2 from tqdm import tqdm from plate_generator import PlateGenerator from transformations import perspective_transform from asset_manager import AssetManager from pascal_voc import bounding_rects_to_xml project_config_path = 'project_configurations.jsonnet' project_config = _jsonnet.evaluate_file(project_config_path) assets = AssetManager(project_config) parser = argparse.ArgumentParser(description='Reading input arguments.') parser.add_argument('--num_out_img', default=assets.generator_config['num_out_img'], type=int) parser.add_argument('--output_directory', default=assets.generator_config['output_directory'], type=str) parser.add_argument('--img_per_package', default=assets.generator_config['img_per_package'], type=int) parser.add_argument('--apply_misc_noise', default=assets.generator_config['apply_misc_noise'], type=bool) parser.add_argument('--apply_dirt', default=assets.generator_config['apply_dirt'], type=bool)
help="End sentence for interpolation") parser.add_argument("--num-steps", type=int, default=8, metavar="N", help="Number of interpolation steps (default: 10)") parser.add_argument("--interpolation-type", type=str, default='lerp', metavar="TYPE", help="Interpolation type") args = parser.parse_args() # Load model model_dir = Path(args.model_path) config = json.loads(evaluate_file(str(model_dir / 'config.jsonnet'))) params = Params(config) with (model_dir / 'TEXT.Field').open("rb") as fp: TEXT: Field = dill.load(fp) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = RecurrentVAE(vocab=TEXT.vocab, params=params.pop('model')) model.load_state_dict(torch.load(model_dir / 'vae.pt')) model.greedy = True model.to(device) model.eval() # Prepare data if args.start_sentence is None or args.end_sentence is None: z_1 = np.random.randn(1, model.latent_dim)
def load_data(config_file): config = json.loads(_jsonnet.evaluate_file(config_file))["data"] config["max_num_var"] = 1 << 30 dataset = Dataset(config["test_file"], config) return dataset
parser.add_argument("--output-spider", help="Path to save outputs in the Spider format") parser.add_argument("--output-google", help="Path to save output in the Google format") args = parser.parse_args() if args.output_spider is None and args.output_google is None: raise ValueError( "specify output destination in either Google or Michigan format") config_path = find_any_config( args.logdir) if args.config is None else args.config api = DuoratAPI(args.logdir, config_path) data_config = json.loads( _jsonnet.evaluate_file(args.data_config, tla_codes={'prefix': '"data/"'})) if data_config['name'] != 'spider': raise ValueError() del data_config['name'] if args.questions: data_config['paths'] = [args.questions] dataset = SpiderDataset(**data_config) sql_schemas = {} for db_id in dataset.schemas: spider_schema = dataset.schemas[db_id] sql_schemas[db_id] = preprocess_schema_uncached( schema=spider_schema, db_path=dataset.get_db_path(db_id), tokenize=api.preproc._schema_tokenize, )
def load_material_args(material_name: str) -> dict: """ Load mujoco args related to given material. """ material_path = os.path.join(MATERIAL_DIR, f"{material_name}.jsonnet") return json.loads(_jsonnet.evaluate_file(material_path))
def load_jsonnet(path: str) -> tp.Dict[str, tp.Any]: ext_vars = _environment_variables() jsondict = json.loads(evaluate_file(str(path), ext_vars=ext_vars)) return tp.cast(tp.Dict[str, tp.Any], jsondict)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--beam-size', type=int, default=1) script_args = parser.parse_args() for (bs, lr, end_lr), att in itertools.product( ((50, 1e-3, 0), #(100, 1e-3, 0), #(10, 5e-4, 0), #(10, 2.5e-4, 0), #(10, 1e-3, 5e-4), #(10, 1e-3, 2.5e-4), ), (0, 1, 2)): steps = list(range(1100, 40000, 1000)) + [40000] args = "{{bs: {bs}, lr: {lr}, end_lr: {end_lr}, att: {att}}}".format( bs=bs, lr=lr, end_lr=end_lr, att=att, ) config = json.loads( _jsonnet.evaluate_file( 'configs/spider-20190205/nl2code-0428-stability.jsonnet', tla_codes={'args': args})) logdir = os.path.join('logdirs/20190428-stability', config['model_name']) for step in steps: if not os.path.exists( os.path.join(logdir, 'model_checkpoint-{:08d}'.format(step))): continue if os.path.exists( os.path.join( logdir, 'eval-val-step{:05d}-bs{}.jsonl'.format( step, script_args.beam_size))): continue infer_command = (( 'python infer.py --config configs/spider-20190205/nl2code-0428-stability.jsonnet ' '--logdir logdirs/20190428-stability ' '--config-args "{args}" ' '--output __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl ' + '--step {step} --section val --beam-size {beam_size}').format( step=step, args=args, beam_size=script_args.beam_size, )) eval_command = (( 'python eval.py --config configs/spider-20190205/nl2code-0428-stability.jsonnet ' '--logdir logdirs/20190428-stability ' '--config-args "{args}" ' '--inferred __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl ' + '--output __LOGDIR__/eval-val-step{step:05d}-bs{beam_size}.jsonl ' + '--section val').format( step=step, args=args, beam_size=script_args.beam_size, )) print('{} && {}'.format(infer_command, eval_command))
def jsonnet_file(file_path, **kwargs): """ Evaluate file_path jsonnet file. kwargs are documented in http://jsonnet.org/implementation/bindings.html """ return jsonnet.evaluate_file(file_path, **kwargs)
import _jsonnet import sys if len(sys.argv) != 3: print('Usage:\npython conf.py [INPUT].jsonnet [OUTPUT].json') sys.exit(0) f_inp = sys.argv[1] f_out = sys.argv[2] js = _jsonnet.evaluate_file(f_inp) fp = open(f_out, 'w') fp.write(js) fp.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('mode', help="preprocess/train/eval", choices=["preprocess", "train", "eval"]) parser.add_argument('exp_config_file', help="jsonnet file for experiments") parser.add_argument('--use_scheduler', help='whether to use lr_scheduler for training.') parser.add_argument('--model_config_args', help="optional overrides for model config args") parser.add_argument('--logdir', help="optional override for logdir") args = parser.parse_args() exp_config = json.loads(_jsonnet.evaluate_file(args.exp_config_file)) model_config_file = exp_config["model_config"] if "model_config_args" in exp_config: model_config_args = exp_config["model_config_args"] if args.model_config_args is not None: model_config_args_json = _jsonnet.evaluate_snippet("", args.model_config_args) model_config_args.update(json.loads(model_config_args_json)) model_config_args = json.dumps(model_config_args) elif args.model_config_args is not None: model_config_args = _jsonnet.evaluate_snippet("", args.model_config_args) else: model_config_args = None logdir = args.logdir or exp_config["logdir"] trainset = exp_config["trainset"] valset = exp_config["valset"] if args.mode == "preprocess": preprocess_config = PreprocessConfig(model_config_file, model_config_args) preprocess.main(preprocess_config) elif args.mode == "train": train_config = TrainConfig(model_config_file, model_config_args, logdir, trainset, valset) if args.use_scheduler is None or args.use_scheduler == 'True': train.main(train_config) elif args.use_scheduler == 'False': train_noscheduler.main(train_config) else: print("use_scheduler not specified!") elif args.mode == "eval": for step in exp_config["eval_steps"]: infer_output_path = f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.infer" infer_config = InferConfig( model_config_file, model_config_args, logdir, exp_config["eval_section"], exp_config["eval_beam_size"], infer_output_path, step, use_heuristic=exp_config["eval_use_heuristic"] ) infer.main(infer_config) eval_output_path = f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.eval" eval_config = EvalConfig( model_config_file, model_config_args, logdir, exp_config["eval_section"], infer_output_path, eval_output_path ) eval.main(eval_config) res_json = json.load(open(eval_output_path)) print(step, res_json['total_scores']['all']['exact'])
def train(args): work_dir = args['--work-dir'] config = json.loads(_jsonnet.evaluate_file(args['CONFIG_FILE'])) config['work_dir'] = work_dir if not os.path.exists(work_dir): print(f'creating work dir [{work_dir}]', file=sys.stderr) os.makedirs(work_dir) if args['--extra-config']: extra_config = args['--extra-config'] extra_config = json.loads(extra_config) config = util.update(config, extra_config) json.dump(config, open(os.path.join(work_dir, 'config.json'), 'w'), indent=2) model = RenamingModel.build(config) config = model.config model.train() if args['--cuda']: model = model.cuda() params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=0.001) nn_util.glorot_init(params) # set the padding index for embedding layers to zeros # model.encoder.var_node_name_embedding.weight[0].fill_(0.) train_set = Dataset(config['data']['train_file']) dev_set = Dataset(config['data']['dev_file']) batch_size = config['train']['batch_size'] print(f'Training set size {len(train_set)}, dev set size {len(dev_set)}', file=sys.stderr) # training loop train_iter = epoch = cum_examples = 0 log_every = config['train']['log_every'] evaluate_every_nepoch = config['train']['evaluate_every_nepoch'] max_epoch = config['train']['max_epoch'] max_patience = config['train']['patience'] cum_loss = 0. patience = 0. t_log = time.time() history_accs = [] while True: # load training dataset, which is a collection of ASTs and maps of gold-standard renamings train_set_iter = train_set.batch_iterator( batch_size=batch_size, return_examples=False, config=config, progress=True, train=True, num_readers=config['train']['num_readers'], num_batchers=config['train']['num_batchers']) epoch += 1 for batch in train_set_iter: train_iter += 1 optimizer.zero_grad() # t1 = time.time() nn_util.to(batch.tensor_dict, model.device) # print(f'[Learner] {time.time() - t1}s took for moving tensors to device', file=sys.stderr) # t1 = time.time() result = model(batch.tensor_dict, batch.tensor_dict['prediction_target']) # print(f'[Learner] batch {train_iter}, {batch.size} examples took {time.time() - t1:4f}s', file=sys.stderr) loss = -result['batch_log_prob'].mean() cum_loss += loss.item() * batch.size cum_examples += batch.size loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm_(params, 5.) optimizer.step() del loss if train_iter % log_every == 0: print( f'[Learner] train_iter={train_iter} avg. loss={cum_loss / cum_examples}, ' f'{cum_examples} examples ({cum_examples / (time.time() - t_log)} examples/s)', file=sys.stderr) cum_loss = cum_examples = 0. t_log = time.time() print(f'[Learner] Epoch {epoch} finished', file=sys.stderr) if epoch % evaluate_every_nepoch == 0: print(f'[Learner] Perform evaluation', file=sys.stderr) t1 = time.time() # ppl = Evaluator.evaluate_ppl(model, dev_set, config, predicate=lambda e: not e['function_body_in_train']) eval_results = Evaluator.decode_and_evaluate( model, dev_set, config) # print(f'[Learner] Evaluation result ppl={ppl} (took {time.time() - t1}s)', file=sys.stderr) print( f'[Learner] Evaluation result {eval_results} (took {time.time() - t1}s)', file=sys.stderr) dev_metric = eval_results['func_body_not_in_train_acc']['accuracy'] # dev_metric = -ppl if len(history_accs) == 0 or dev_metric > max(history_accs): patience = 0 model_save_path = os.path.join(work_dir, f'model.bin') model.save(model_save_path) print( f'[Learner] Saved currently the best model to {model_save_path}', file=sys.stderr) else: patience += 1 if patience == max_patience: print( f'[Learner] Reached max patience {max_patience}, exiting...', file=sys.stderr) patience = 0 exit() history_accs.append(dev_metric) if epoch == max_epoch: print(f'[Learner] Reached max epoch', file=sys.stderr) exit() t1 = time.time()
def load_jsonnet(p): import _jsonnet return json.loads(_jsonnet.evaluate_file(p))
USE_TZ = True TEST_RUNNER = 'django_nose.NoseTestSuiteRunner' # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.0/howto/static-files/ STATIC_URL = '/static/' # number of bits for the key, all auths should use the same number of bits KEYBITS = 256 # Versioning ALLOWED_VERSIONS = ['v1', 'v2'] DEFAULT_VERSION = 'v1' try: from local_settings import * except ImportError: print("local_settings.py not found") # loading jsonnet config if os.path.exists("config.jsonnet"): import json from _jsonnet import evaluate_file config = json.loads(evaluate_file("config.jsonnet")) for k, v in config.items(): vars()[k] = v INSTALLED_APPS = INSTALLED_APPS + MODULES
if len(sys.argv) != 2: raise Exception('Usage: <filename>') # Returns content if worked, None if file not found, or throws an exception def try_path(dir, rel): if not rel: raise RuntimeError('Got invalid filename (empty string).') if rel[0] == '/': full_path = rel else: full_path = dir + rel if full_path[-1] == '/': raise RuntimeError('Attempted to import a directory') if not os.path.isfile(full_path): return full_path, None with open(full_path) as f: return full_path, f.read() def import_callback(dir, rel): full_path, content = try_path(dir, rel) if content: return full_path, content raise RuntimeError('File not found') sys.stdout.write( _jsonnet.evaluate_file(sys.argv[1], import_callback=import_callback))
full_path = dir + rel if full_path[-1] == '/': raise RuntimeError('Attempted to import a directory') if not os.path.isfile(full_path): return full_path, None with open(full_path) as f: return full_path, f.read() def import_callback(dir, rel): full_path, content = try_path(dir, rel) if content: return full_path, content raise RuntimeError('File not found') # Test native extensions def concat(a, b): return a + b native_callbacks = { 'concat': (('a', 'b'), concat), } json_str = _jsonnet.evaluate_file( sys.argv[1], import_callback=import_callback, native_callbacks=native_callbacks, ) sys.stdout.write(json_str)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--logdir', required=True) parser.add_argument('--config', required=True) parser.add_argument('--config-args') args = parser.parse_args() if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') if args.config_args: config = json.loads(_jsonnet.evaluate_file(args.config, tla_codes={'args': args.config_args})) else: config = json.loads(_jsonnet.evaluate_file(args.config)) if 'model_name' in config: args.logdir = os.path.join(args.logdir, config['model_name']) train_config = registry.instantiate(TrainConfig, config['train']) reopen_to_flush = config.get('log', {}).get('reopen_to_flush') logger = Logger(os.path.join(args.logdir, 'log.txt'), reopen_to_flush) with open(os.path.join(args.logdir, 'config-{}.json'.format( datetime.datetime.now().strftime('%Y%m%dT%H%M%S%Z'))), 'w') as f: json.dump(config, f, sort_keys=True, indent=4) logger.log('Logging to {}'.format(args.logdir)) init_random = random_state.RandomContext(train_config.init_seed) data_random = random_state.RandomContext(train_config.data_seed) model_random = random_state.RandomContext(train_config.model_seed) with init_random: # 0. Construct preprocessors model_preproc = registry.instantiate( registry.lookup('model', config['model']).Preproc, config['model'], unused_keys=('name',)) model_preproc.load() # 1. Construct model model = registry.construct('model', config['model'], unused_keys=('encoder_preproc', 'decoder_preproc'), preproc=model_preproc, device=device) model.to(device) optimizer = registry.construct('optimizer', config['optimizer'], params=model.parameters()) lr_scheduler = registry.construct( 'lr_scheduler', config.get('lr_scheduler', {'name': 'noop'}), optimizer=optimizer) # 2. Restore its parameters saver = saver_mod.Saver( model, optimizer, keep_every_n=train_config.keep_every_n) last_step = saver.restore(args.logdir) # 3. Get training data somewhere with data_random: train_data = model_preproc.dataset('train') train_data_loader = yield_batches_from_epochs( torch.utils.data.DataLoader( train_data, batch_size=train_config.batch_size, shuffle=True, drop_last=True, collate_fn=lambda x: x)) train_eval_data_loader = torch.utils.data.DataLoader( train_data, batch_size=train_config.eval_batch_size, collate_fn=lambda x: x) val_data = model_preproc.dataset('val') val_data_loader = torch.utils.data.DataLoader( val_data, batch_size=train_config.eval_batch_size, collate_fn=lambda x: x) # 4. Start training loop with data_random: for batch in train_data_loader: # Quit if too long if last_step >= train_config.max_steps: break # Evaluate model if last_step % train_config.eval_every_n == 0: if train_config.eval_on_train: eval_model(logger, model, last_step, train_eval_data_loader, 'train', num_eval_items=train_config.num_eval_items) if train_config.eval_on_val: eval_model(logger, model, last_step, val_data_loader, 'val', num_eval_items=train_config.num_eval_items) # Compute and apply gradient with model_random: optimizer.zero_grad() loss = model.compute_loss(batch) loss.backward() lr_scheduler.update_lr(last_step) optimizer.step() # Report metrics if last_step % train_config.report_every_n == 0: logger.log('Step {}: loss={:.4f}'.format(last_step, loss.item())) last_step += 1 # Run saver if last_step % train_config.save_every_n == 0: saver.save(args.logdir, last_step)
mtest.ids_lengths: ids_lengths, mtest.labels: labels, })) preds = preds[:batch_size_original] return preds parser = argparse.ArgumentParser() parser.add_argument('--dataset', required=True) parser.add_argument('--load-model') parser.add_argument('--config') parser.add_argument('--input') args = parser.parse_args() input_str = args.input data = pickle.load(open(args.dataset)) word_ids = data['word_ids'] labelers = data['labelers'] pretty_config_str = _jsonnet.evaluate_file(args.config) config = json.loads(pretty_config_str) ifttt_train = IftttTrain(args, config) with tf.variable_scope('model', reuse=None, initializer=None): m = ifttt_train.create_model(is_training=False) saver = tf.train.Saver(max_to_keep=0) with tf.Session() as sess: saver.restore(sess, args.load_model) app.run()
# Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import _jsonnet if len(sys.argv) != 2: raise Exception("Usage: <filename>") sys.stdout.write(_jsonnet.evaluate_file(sys.argv[1]))
import sys import _jsonnet import json from kpm.template_filters import jsonnet_callbacks from kpm.render_jsonnet import RenderJsonnet #r = RenderJsonnet() #result = r.render_jsonnet(open(sys.argv[1]).read()) def native_bool(b): return ['true', True, False, 1, 0] json_str = _jsonnet.evaluate_file( sys.argv[1], native_callbacks={"nativeBool": (("bool",), native_bool)}, ) sys.stdout.write(json_str) #sys.stdout.write(json.dumps(result))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', required=True) parser.add_argument('--config-args') args = parser.parse_args() if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') if args.config_args: config = json.loads( _jsonnet.evaluate_file(args.config, tla_codes={'args': args.config_args})) else: config = json.loads(_jsonnet.evaluate_file(args.config)) # 0. Construct preprocessors model_preproc = registry.instantiate(registry.lookup( 'model', config['model']).Preproc, config['model'], unused_keys=('name', )) model_preproc.load() # 1. Construct model model = registry.construct('model', config['model'], unused_keys=('encoder_preproc', 'decoder_preproc'), preproc=model_preproc, device=device) model.to(device) model.eval() # 3. Get training data somewhere train_data = model_preproc.dataset('train') train_eval_data_loader = torch.utils.data.DataLoader( train_data, batch_size=10, collate_fn=lambda x: x) batch = next(iter(train_eval_data_loader)) descs = [x for x, y in batch] q0, qb = test_enc_equal([descs[0]['question']], [[desc['question']] for desc in descs], model.encoder.question_encoder) c0, cb = test_enc_equal(descs[0]['columns'], [desc['columns'] for desc in descs], model.encoder.column_encoder) t0, tb = test_enc_equal(descs[0]['tables'], [desc['tables'] for desc in descs], model.encoder.table_encoder) q0_enc, c0_enc, t0_enc = model.encoder.encs_update.forward_unbatched( descs[0], q0[0], c0[0], c0[1], t0[0], t0[1]) qb_enc, cb_enc, tb_enc = model.encoder.encs_update.forward( descs, qb[0], cb[0], cb[1], tb[0], tb[1]) check_close(q0_enc.squeeze(1), qb_enc.select(0)) check_close(c0_enc.squeeze(1), cb_enc.select(0)) check_close(t0_enc.squeeze(1), tb_enc.select(0))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--beam-size', type=int, default=1) script_args = parser.parse_args() for (glove, upd_type, num_layers), att in itertools.product(( (False, 'full', 4), (True, 'no_subtypes', 4), (True, 'merge_types', 4), (True, 'full', 2), (True, 'full', 0), ), (0, 1, 2)): steps = list(range(1100, 40000, 1000)) + [40000] args = '{{glove: {glove}, upd_type: \'{upd_type}\', num_layers: {num_layers}, att: {att}}}'.format( glove='true' if glove else 'false', upd_type=upd_type, num_layers=num_layers, att=att) config = json.loads( _jsonnet.evaluate_file( 'configs/spider-20190205/nl2code-0521-ablations.jsonnet', tla_codes={'args': args})) logdir = os.path.join('logdirs/20190521-ablations', config['model_name']) for step in steps: if not os.path.exists( os.path.join(logdir, 'model_checkpoint-{:08d}'.format(step))): continue if os.path.exists( os.path.join( logdir, 'eval-val-step{:05d}-bs{}.jsonl'.format( step, script_args.beam_size))): continue infer_command = (( 'python infer.py ' '--config configs/spider-20190205/nl2code-0521-ablations.jsonnet ' '--logdir logdirs/20190521-ablations ' '--config-args "{args}" ' + '--output __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl ' + '--step {step} --section val --beam-size {beam_size}').format( args=args, step=step, beam_size=script_args.beam_size, )) eval_command = (( 'python eval.py --config configs/spider-20190205/nl2code-0521-ablations.jsonnet ' + '--logdir logdirs/20190521-ablations ' + '--config-args "{args}" ' + '--inferred __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl ' + '--output __LOGDIR__/eval-val-step{step:05d}-bs{beam_size}.jsonl ' + '--section val').format( args=args, step=step, beam_size=script_args.beam_size, )) print('{} && {}'.format(infer_command, eval_command))
import _jsonnet if len(sys.argv) != 2: raise Exception('Usage: <filename>') # Returns content if worked, None if file not found, or throws an exception def try_path(dir, rel): if not rel: raise RuntimeError('Got invalid filename (empty string).') if rel[0] == '/': full_path = rel else: full_path = dir + rel if full_path[-1] == '/': raise RuntimeError('Attempted to import a directory') if not os.path.isfile(full_path): return full_path, None with open(full_path) as f: return full_path, f.read() def import_callback(dir, rel): full_path, content = try_path(dir, rel) if content: return full_path, content raise RuntimeError('File not found') sys.stdout.write(_jsonnet.evaluate_file(sys.argv[1], import_callback=import_callback))
def process_cm_binary_data(name, data, main_jsonnet, ext_libs=[], user_args={}): """Process binary_data field from jsonnet configMap. Extracts folder, evaluates main_jsonnet file from folder and parses it to separate json objects. main_jsonnet should generate all jsons in one json file. Args: name (str): Config map name. data (dict): Binary data from configMap labeled as jsonnet code. It should be base64 encoded jsonnet folder (archive). main_jsonnet (str): Path in extracted folder to jsonnet file that will be evaluated. ext_libs (:obj:`list of str`, optional): List of paths to external jsonnet libs. user_args (:obj:`dict`, optional): Keyword arguments to jsonnet build function. Returns: list of (str, dict): Generated json data. Raises: JsonnetConfigMapError: Raised if jsonnet evaluation fails or wrong archive format is provided. """ tmp_folder_name = f"jsonnet_archive_{name}" tmp_file_name = f"generated_from_archive_{name}.json" jsons = [] for dataKey in data.keys(): filename, extension = os.path.splitext(dataKey) if extension not in [ ".gz", ".tar", ".zip", ".bz2", ".7z", ".tgz", ".rar", ".xz", ]: log.error(f"Unsupported archive format: {dataKey}") raise JsonnetConfigMapError archive_data = data[dataKey] utils.extract_archive_data(archive_data, dataKey, tmp_folder_name) jsonnet_filepath = os.path.join(tmp_folder_name, main_jsonnet) try: json_ = _jsonnet.evaluate_file(jsonnet_filepath, jpathdir=ext_libs, **user_args) except RuntimeError as e: log.error( f"{main_jsonnet} is not a valid jsonnet, raised error: {e}") utils.remove_folder(tmp_folder_name) raise JsonnetConfigMapError else: utils.save_text_to_file("./", tmp_file_name, json_) dashboards = parse_json_with_files(tmp_file_name) jsons.extend(dashboards) utils.remove_file("./", tmp_file_name) utils.remove_folder(tmp_folder_name) return jsons
if __name__ == '__main__': parser = ArgumentParser(description="Training of Sentence VAE") parser.add_argument("--config", type=str, required=True, metavar='PATH', help="Path to a configuration file.") parser.add_argument("--hyper-parameters", type=str, metavar='PATH', help="Path to a hyper parameters file.") parser.add_argument("--run-dir", type=str, required=True, metavar='PATH', help="Path to a directory where model checkpoints will be stored.") parser.add_argument("--force", action='store_true', help="Whether to rewrite data if run directory already exists.") parser.add_argument("--experiment-name", type=str, metavar="ID", help="Name of experiment if training process is run under mlflow") parser.add_argument("--verbose", action='store_true', help="Verbosity of the training script.") args = parser.parse_args() if args.experiment_name is not None: if args.hyper_parameters is None: raise ValueError("You should provide hyper-parameters file to log into mlflow.") with open(args.hyper_parameters) as fp: h_params = json.load(fp) mlflow.set_tracking_uri(args.run_dir) mlflow_client = MlflowClient(args.run_dir) experiment_id = get_experiment_id(mlflow_client, args.experiment_name) tags = get_git_tags(Path.cwd()) run_experiment(h_params, args.config, mlflow_client, experiment_id, tags=tags, verbose=args.verbose) else: params = json.loads(evaluate_file(args.config)) train(args.run_dir, params, args.force, verbose=args.verbose)
def load_env(pattern, core_dir=worldgen_path(), envs_dir='examples', xmls_dir='xmls', return_args_remaining=False, **kwargs): """ Flexible load of an environment based on `pattern`. Passes args to make_env(). :param pattern: tries to match environment to the pattern. :param core_dir: Absolute path to the core code directory for the project containing the environments we want to examine. This is usually the top-level git repository folder - in the case of the mujoco-worldgen repo, it would be the 'mujoco-worldgen' folder. :param envs_dir: relative path (from core_dir) to folder containing all environment files. :param xmls_dir: relative path (from core_dir) to folder containing all xml files. :param return_remaining_kwargs: returns arguments from kwargs that are not used. :param kwargs: arguments passed to the environment function. :return: mujoco_worldgen.Env """ # Loads environment based on XML. env = None args_remaining = {} if pattern.endswith(".xml"): if len(kwargs) > 0: print("Not passing any argument to environment, " "because environment is loaded from XML. XML doesn't " "accept any extra input arguments") def get_sim(seed): model = load_model_from_path_fix_paths(xml_path=pattern) return MjSim(model) env = Env(get_sim=get_sim) # Loads environment based on mjb. elif pattern.endswith(".mjb"): if len(kwargs) != 0: print("Not passing any argument to environment, " "because environment is loaded from MJB. MJB doesn't " "accept any extra input arguments") def get_sim(seed): model = load_model_from_mjb(pattern) return MjSim(model) env = Env(get_sim=get_sim) # Loads environment from a python file elif pattern.endswith("py") and os.path.exists(pattern): print("Loading env from the module: %s" % pattern) module = run_path(pattern) make_env = module["make_env"] args_to_pass, args_remaining = extract_matching_arguments(make_env, kwargs) env = make_env(**args_to_pass) elif pattern.endswith(".jsonnet") and os.path.exists(pattern): env_data = json.loads(_jsonnet.evaluate_file(pattern)) make_env = get_function(env_data['make_env']) args_to_pass, args_remaining = extract_matching_arguments(make_env, kwargs) env = make_env(**args_to_pass) else: # If couldn't load based on easy search, then look # into predefined subdirectories. matching = (glob(join(core_dir, envs_dir, "**", "*.py"), recursive=True) + glob(join(core_dir, xmls_dir, "**", "*.xml"), recursive=True)) matching = [match for match in matching if match.find(pattern) > -1] matching = [match for match in matching if not os.path.basename(match).startswith('test_')] assert len(matching) < 2, "Found multiple environments matching %s" % str(matching) if len(matching) == 1: return load_env(matching[0], return_args_remaining=return_args_remaining, **kwargs) if return_args_remaining: return env, args_remaining else: return env