Esempio n. 1
0
 def test_evaluate_file(self):
     json_str = _jsonnet.evaluate_file(
         self.input_filename,
         import_callback=import_callback,
         native_callbacks=native_callbacks,
     )
     self.assertEqual(json_str, self.expected_str)
Esempio n. 2
0
def config_load(filename, ext_vars):
    try:
        text = _jsonnet.evaluate_file(
            filename,
            max_trace=100,
            ext_vars=ext_vars,
            import_callback=jsonnet_import_callback)
    except RuntimeError as e:
        # Error from Jsonnet
        sys.stderr.write(e.message)
        sys.stderr.write('\n')
        sys.exit(1)

    config = json.loads(text)
    try:
        config_check(config)
    except validate.ConfigError as e:
        if debug:
            traceback.print_exc()
        else:
            sys.stderr.write('Config error: %s\n' % e.message)
        if e.note:
            sys.stderr.write('%s\n' % e.note)
        sys.exit(1)
    return config
Esempio n. 3
0
    def from_file(params_file: str, params_overrides: str = "", ext_vars: dict = None) -> 'Params':
        """
        Load a `Params` object from a configuration file.

        Parameters
        ----------
        params_file : ``str``
            The path to the configuration file to load.
        params_overrides : ``str``, optional
            A dict of overrides that can be applied to final object.
            e.g. {"model.embedding_dim": 10}
        ext_vars : ``dict``, optional
            Our config files are Jsonnet, which allows specifying external variables
            for later substitution. Typically we substitute these using environment
            variables; however, you can also specify them here, in which case they
            take priority over environment variables.
            e.g. {"HOME_DIR": "/Users/allennlp/home"}
        """
        if ext_vars is None:
            ext_vars = {}

        # redirect to cache, if necessary
        params_file = cached_path(params_file)
        ext_vars = {**dict(os.environ), **ext_vars}

        file_dict = json.loads(evaluate_file(params_file, ext_vars=ext_vars))

        overrides_dict = parse_overrides(params_overrides)
        param_dict = with_fallback(preferred=overrides_dict, fallback=file_dict)

        return Params(param_dict)
Esempio n. 4
0
 def ExpandFile(self, file_name):
   # Jsonnet interpreter, import only if needed to avoid dependency.
   try:
     import _jsonnet
   except:
     raise JsonnetNotFoundError('Module "_jsonnet" missing;  Is _jsonnet.so in your $PYTHONPATH?')
   project = self.__kwargs['project']
   json_str = _jsonnet.evaluate_file(file_name, env={'GCP_PROJECT': project})
   json_data = json.loads(json_str)
   return json_data['resources']
Esempio n. 5
0
def config_load(filename):
    try:
        text = _jsonnet.evaluate_file(filename, max_trace=100)
    except RuntimeError as e:
        # Error from Jsonnet
        sys.stderr.write(e.message)
        sys.stderr.write('\n')
        sys.exit(1)

    config = json.loads(text)
    try:
        config_check(config)
    except ConfigError as e:
        sys.stderr.write(e.message)
        sys.stderr.write('\n')
        sys.exit(1)
    return config
Esempio n. 6
0
from __future__ import absolute_import, division, print_function

import json
import sys

import _jsonnet

from kpm.render_jsonnet import RenderJsonnet
from kpm.template_filters import jsonnet_callbacks


#r = RenderJsonnet()
#result = r.render_jsonnet(open(sys.argv[1]).read())
def native_bool(b):
    return ['true', True, False, 1, 0]


json_str = _jsonnet.evaluate_file(
    sys.argv[1],
    native_callbacks={"nativeBool": (("bool", ), native_bool)},
)

sys.stdout.write(json_str)
#sys.stdout.write(json.dumps(result))
Esempio n. 7
0
def main():
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
        torch.set_num_threads(1)
    if args.config_args:
        config = json.loads(
            _jsonnet.evaluate_file(args.config,
                                   tla_codes={'args': args.config_args}))
    else:
        config = json.loads(_jsonnet.evaluate_file(args.config))

    if 'model_name' in config:
        args.logdir = os.path.join(args.logdir, config['model_name'])

    output_path = args.output.replace('__LOGDIR__', args.logdir)
    if os.path.exists(output_path):
        print('Output file {} already exists'.format(output_path))
        sys.exit(1)

    # 0. Construct preprocessors
    model_preproc = registry.instantiate(
        registry.lookup('model', config['model']).Preproc, config['model'])
    model_preproc.load()

    # 1. Construct model
    model = registry.construct('model',
                               config['model'],
                               preproc=model_preproc,
                               device=device)
    model.to(device)
    model.eval()
    model.visualize_flag = False

    optimizer = registry.construct('optimizer',
                                   config['optimizer'],
                                   params=model.parameters())

    # 2. Restore its parameters
    saver = saver_mod.Saver(model, optimizer)
    last_step = saver.restore(args.logdir, step=args.step, map_location=device)
    if not last_step:
        raise Exception('Attempting to infer on untrained model')

    # 3. Get training data somewhere
    output = open(output_path, 'w')
    data = registry.construct('dataset', config['data'][args.section])
    if args.limit:
        sliced_data = itertools.islice(data, args.limit)
    else:
        sliced_data = data

    with torch.no_grad():
        if args.mode == 'infer':
            orig_data = registry.construct('dataset',
                                           config['data'][args.section])
            preproc_data = model_preproc.dataset(args.section)
            if args.limit:
                sliced_orig_data = itertools.islice(data, args.limit)
                sliced_preproc_data = itertools.islice(data, args.limit)
            else:
                sliced_orig_data = orig_data
                sliced_preproc_data = preproc_data
            assert len(orig_data) == len(preproc_data)
            infer(model, args.beam_size, args.output_history, sliced_orig_data,
                  sliced_preproc_data, output)
        elif args.mode == 'debug':
            data = model_preproc.dataset(args.section)
            if args.limit:
                sliced_data = itertools.islice(data, args.limit)
            else:
                sliced_data = data
            debug(model, sliced_data, output)
        elif args.mode == 'visualize_attention':
            model.visualize_flag = True
            model.decoder.visualize_flag = True
            data = registry.construct('dataset', config['data'][args.section])
            if args.limit:
                sliced_data = itertools.islice(data, args.limit)
            else:
                sliced_data = data
            visualize_attention(model, args.beam_size, args.output_history,
                                sliced_data, output)
Esempio n. 8
0
import os
import sys
import json
from _jsonnet import evaluate_file

# Jsonnet file to evaluate
params_file = sys.argv[1]

# Filepath of output file
outfile = sys.argv[2]

# Environment variables
ext_vars = dict(os.environ)

file_dict = json.loads(evaluate_file(params_file, ext_vars=ext_vars))

with open(outfile, "w") as handle:
    json.dump(file_dict, handle, indent=4)
Esempio n. 9
0
import random
import subprocess
import torch
import logging
import json
import _jsonnet
import pathlib
from collections import defaultdict
from sklearn.metrics import precision_recall_fscore_support

MODEL_CONFIG = sys.argv[1]
MODELS_DIR = pathlib.Path(sys.argv[2])
SEMI_SUPERVISED_UNLABELLED_FILE = sys.argv[3]
SEED = sys.argv[4]
VAL_DATA_PATH = pathlib.Path(
    json.loads(_jsonnet.evaluate_file(MODEL_CONFIG))['validation_data_path'])

random.seed(SEED)

MODELS_DIR.mkdir(parents=True, exist_ok=True)

# Generate the seeds for all the models in this tri training run.
# We opt to use same seed to control for intialization, so only tri-training dataset can affect the performance in a given run
PYTORCH_SEED = random.randint(0, 10000)
NUMPY_SEED = random.randint(0, 10000)
RANDOM_SEED = random.randint(0, 10000)

logger = logging.getLogger(__file__)
logger.info('START')

Esempio n. 10
0
                variable_mention_to_variable_id=variable_mention_to_variable_id,
                variable_mention_mask=variable_mention_mask,
                variable_mention_num=variable_mention_num,
                variable_encoding_mask=variable_encoding_mask,
                target_type_src_mems=target_type_src_mems,
                src_type_id=src_type_id,
                target_mask=target_mask,
                target_submask=target_subtype_id > 0,
                target_type_sizes=target_type_sizes,
            ),
            dict(
                tgt_var_names=sum([e.tgt_var_names for e in examples], []),
                target_type_id=target_type_id,
                target_name_id=target_name_id,
                target_subtype_id=target_subtype_id,
                target_mask=target_mask,
                test_meta=[e.test_meta for e in examples],
            ),
        )


if __name__ == "__main__":
    config = json.loads(_jsonnet.evaluate_file("config.xfmr.jsonnet"))
    dataset = Dataset("data1/dev-*.tar", config["data"])
    dataloader = torch.utils.data.DataLoader(dataset,
                                             num_workers=8,
                                             batch_size=64,
                                             collate_fn=Dataset.collate_fn)
    for x in dataloader:
        pass
Esempio n. 11
0
import sys
import json
import _jsonnet

if __name__ == "__main__":
    config_to_edit_path = sys.argv[1]
    jsonnet_config_path = sys.argv[2]

    with open(config_to_edit_path) as f1:
        config_to_edit = json.load(f1)

    jsonnet_config = json.loads(_jsonnet.evaluate_file(jsonnet_config_path))

    for key in ["random_seed", "numpy_seed", "pytorch_seed"]:
        config_to_edit[key] = jsonnet_config[key]

    with open(config_to_edit_path, "w") as f1:
        json.dump(config_to_edit, f1, indent=4)
Esempio n. 12
0
def parse_parameter_json(parameters_path: str):
    parameters = json.loads(_jsonnet.evaluate_file(parameters_path))
    keys = parameters.keys()
    values = [parameters[k] for k in keys]
    for combinations in itertools.product(*values):
        yield dict(zip(keys, combinations))
Esempio n. 13
0
import sys
import json
import _jsonnet as jsonnet
import os.path as _p
from jsonschema2db import JSONSchemaToPostgres

input_file_path = sys.argv[-1]
assert _p.splitext(input_file_path)[-1] in ('.jsonnet', '.json')

json_string = jsonnet.evaluate_file(input_file_path)
schema = json.loads(json_string)


class DummyConnection:
    sql_history = []

    @classmethod
    def get_statements(cls):
        return cls.sql_history

    def cursor(connection):
        class Lifecycle:
            class DummyCursor:
                def execute(self, query, args):
                    connection.sql_history.append((query, args))

            def __enter__(self):
                return self.DummyCursor()

            def __exit__(self, *args):
                pass
Esempio n. 14
0
def setup(args):
    """ Create the blackbox function to optimize.

    This is a complex function that wraps the true parameter setting and training
    in subprocess calls to allennlp.
    """
    base_config = json.loads(_jsonnet.evaluate_file(args.base_config_path))
    search_config = json.loads(_jsonnet.evaluate_file(args.search_config_path))
    arg_overrides = parse_overrides(args.overrides)

    # Flatten configs and get shorthand mappings
    flat_base_config = flatten(base_config)
    flat_search_config = flatten(search_config)
    shorthands = get_shorthands(flat_search_config)

    # Extract any variable dimensions and the mapping to their keys
    search_space = extract_search_space(flat_search_config)
    lambdas = extract_lambdas(flat_search_config)
    dimensions = list(search_space.values())

    # We no longer use the base config as an initial point because the base config
    # needs to be minimal -- cannot contain fields which aren't used by certain hp
    # configurations since overrides cannot "delete" a field in the base config.
    x0 = None  # get_x0(flat_base_config, search_space)

    trial_num = 0
    trial_paths = dict()

    # Construct f
    def f(x):
        nonlocal trial_num
        nonlocal trial_paths

        # Map the x to the config keys that need updated
        newx = []
        for d,p in zip(dimensions, x):
            print(d.name, d, p, type(p))
            if 'numpy' in str(type(p)):
                p = p.item()
            newx.append(p)
        x = newx
        overrides = skopt.utils.point_asdict(search_space, x)
        overrides = fill_search_constants(overrides, flat_search_config)
        overrides = restrict_type_overrides(overrides, flat_search_config)

        # print(f'Overrides after fill and restrict: {json.dumps(overrides, indent=2)}')

        # Construct the trial serialization path
        trial_str = construct_trial_name(overrides, shorthands, trial_num)
        trial_path = os.path.join(args.serialization_dir, trial_str)
        trial_paths[trial_num] = trial_path

        # Construct the overrides string
        processed_overrides = format_overrides(overrides, lambdas, base_config, arg_overrides)
        print(f'Sampled config: {json.dumps(processed_overrides, indent=2)}')
        override_str = json.dumps(processed_overrides, indent=None)

        # Run Allennlp train subprocess
        cmd = f"allennlp train {args.base_config_path} -f -s {trial_path} -o '{override_str}' --file-friendly-logging --include-package {args.include_package}"
        print(f'CMD: {cmd}')
        try:
            subprocess.check_call(cmd, shell=True)
        except Exception as e:
            logger.error(e, exc_info=True)
            raise e

        trial_num += 1

        # Retrieve the best validation metric and return that value
        metrics = json.load(open(os.path.join(trial_path, 'metrics.json')))
        validation_metric = base_config['trainer']['validation_metric']
        negate = validation_metric.startswith('+')
        validation_metric = validation_metric.lstrip('+-')
        y = metrics[f'best_validation_{validation_metric}']
        if negate:
            y = -y

        return y

    # Construct a callback which maintains only the best weights/archive
    def delete_worse_files_cb(results):
        """ Remove .th and .gz files for any trials that aren't the best so far.
        """
        nonlocal trial_num
        nonlocal trial_paths
        logger.info(f'DELETE WORSE FILES, trial num:{trial_num}')

        best_trial_num = np.argmin(results.func_vals).item()
        logger.info(f'Func values: {results.func_vals},  best is {best_trial_num} with path {trial_paths[best_trial_num]}')
        for i in range(trial_num):
            if i != best_trial_num:
                logger.info(f'Deleting .th and .gz files at {trial_paths[i]}')
                th_path = os.path.join(trial_paths[i], '*.th')
                gz_path = os.path.join(trial_paths[i], '*.gz')
                cmd = f"rm -f {th_path} && rm -f {gz_path}"
                try:
                    subprocess.check_call(cmd, shell=True)
                except Exception as e:
                    logger.error(e, exc_info=True)
                    raise e

    return f, dimensions, x0, trial_paths, delete_worse_files_cb
Esempio n. 15
0
import argparse
import os

import psutil
import _jsonnet
from cv2 import cv2
from tqdm import tqdm
from plate_generator import PlateGenerator
from transformations import perspective_transform
from asset_manager import AssetManager
from pascal_voc import bounding_rects_to_xml

project_config_path = 'project_configurations.jsonnet'
project_config = _jsonnet.evaluate_file(project_config_path)
assets = AssetManager(project_config)
parser = argparse.ArgumentParser(description='Reading input arguments.')
parser.add_argument('--num_out_img',
                    default=assets.generator_config['num_out_img'],
                    type=int)
parser.add_argument('--output_directory',
                    default=assets.generator_config['output_directory'],
                    type=str)
parser.add_argument('--img_per_package',
                    default=assets.generator_config['img_per_package'],
                    type=int)
parser.add_argument('--apply_misc_noise',
                    default=assets.generator_config['apply_misc_noise'],
                    type=bool)
parser.add_argument('--apply_dirt',
                    default=assets.generator_config['apply_dirt'],
                    type=bool)
Esempio n. 16
0
                        help="End sentence for interpolation")
    parser.add_argument("--num-steps",
                        type=int,
                        default=8,
                        metavar="N",
                        help="Number of interpolation steps (default: 10)")
    parser.add_argument("--interpolation-type",
                        type=str,
                        default='lerp',
                        metavar="TYPE",
                        help="Interpolation type")
    args = parser.parse_args()

    # Load model
    model_dir = Path(args.model_path)
    config = json.loads(evaluate_file(str(model_dir / 'config.jsonnet')))
    params = Params(config)

    with (model_dir / 'TEXT.Field').open("rb") as fp:
        TEXT: Field = dill.load(fp)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = RecurrentVAE(vocab=TEXT.vocab, params=params.pop('model'))
    model.load_state_dict(torch.load(model_dir / 'vae.pt'))
    model.greedy = True
    model.to(device)
    model.eval()

    # Prepare data
    if args.start_sentence is None or args.end_sentence is None:
        z_1 = np.random.randn(1, model.latent_dim)
Esempio n. 17
0
def load_data(config_file):
    config = json.loads(_jsonnet.evaluate_file(config_file))["data"]
    config["max_num_var"] = 1 << 30
    dataset = Dataset(config["test_file"], config)
    return dataset
Esempio n. 18
0
    parser.add_argument("--output-spider",
                        help="Path to save outputs in the Spider format")
    parser.add_argument("--output-google",
                        help="Path to save output in the Google format")
    args = parser.parse_args()

    if args.output_spider is None and args.output_google is None:
        raise ValueError(
            "specify output destination in either Google or Michigan format")

    config_path = find_any_config(
        args.logdir) if args.config is None else args.config
    api = DuoratAPI(args.logdir, config_path)

    data_config = json.loads(
        _jsonnet.evaluate_file(args.data_config,
                               tla_codes={'prefix': '"data/"'}))
    if data_config['name'] != 'spider':
        raise ValueError()
    del data_config['name']
    if args.questions:
        data_config['paths'] = [args.questions]
    dataset = SpiderDataset(**data_config)

    sql_schemas = {}
    for db_id in dataset.schemas:
        spider_schema = dataset.schemas[db_id]
        sql_schemas[db_id] = preprocess_schema_uncached(
            schema=spider_schema,
            db_path=dataset.get_db_path(db_id),
            tokenize=api.preproc._schema_tokenize,
        )
Esempio n. 19
0
def load_material_args(material_name: str) -> dict:
    """
    Load mujoco args related to given material.
    """
    material_path = os.path.join(MATERIAL_DIR, f"{material_name}.jsonnet")
    return json.loads(_jsonnet.evaluate_file(material_path))
Esempio n. 20
0
def load_jsonnet(path: str) -> tp.Dict[str, tp.Any]:
    ext_vars = _environment_variables()
    jsondict = json.loads(evaluate_file(str(path), ext_vars=ext_vars))
    return tp.cast(tp.Dict[str, tp.Any], jsondict)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--beam-size', type=int, default=1)
    script_args = parser.parse_args()

    for (bs, lr, end_lr), att in itertools.product(
        ((50, 1e-3, 0),
         #(100, 1e-3, 0),
         #(10, 5e-4, 0),
         #(10, 2.5e-4, 0),
         #(10, 1e-3, 5e-4),
         #(10, 1e-3, 2.5e-4),
         ),
        (0, 1, 2)):
        steps = list(range(1100, 40000, 1000)) + [40000]
        args = "{{bs: {bs}, lr: {lr}, end_lr: {end_lr}, att: {att}}}".format(
            bs=bs,
            lr=lr,
            end_lr=end_lr,
            att=att,
        )
        config = json.loads(
            _jsonnet.evaluate_file(
                'configs/spider-20190205/nl2code-0428-stability.jsonnet',
                tla_codes={'args': args}))
        logdir = os.path.join('logdirs/20190428-stability',
                              config['model_name'])

        for step in steps:
            if not os.path.exists(
                    os.path.join(logdir,
                                 'model_checkpoint-{:08d}'.format(step))):
                continue

            if os.path.exists(
                    os.path.join(
                        logdir, 'eval-val-step{:05d}-bs{}.jsonl'.format(
                            step, script_args.beam_size))):
                continue

            infer_command = ((
                'python infer.py --config configs/spider-20190205/nl2code-0428-stability.jsonnet '
                '--logdir logdirs/20190428-stability '
                '--config-args "{args}" '
                '--output __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl '
                +
                '--step {step} --section val --beam-size {beam_size}').format(
                    step=step,
                    args=args,
                    beam_size=script_args.beam_size,
                ))

            eval_command = ((
                'python eval.py --config configs/spider-20190205/nl2code-0428-stability.jsonnet '
                '--logdir logdirs/20190428-stability '
                '--config-args "{args}" '
                '--inferred __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl '
                +
                '--output __LOGDIR__/eval-val-step{step:05d}-bs{beam_size}.jsonl '
                + '--section val').format(
                    step=step,
                    args=args,
                    beam_size=script_args.beam_size,
                ))

            print('{} && {}'.format(infer_command, eval_command))
Esempio n. 22
0
def jsonnet_file(file_path, **kwargs):
    """
    Evaluate file_path jsonnet file.
    kwargs are documented in http://jsonnet.org/implementation/bindings.html
    """
    return jsonnet.evaluate_file(file_path, **kwargs)
Esempio n. 23
0
import _jsonnet
import sys

if len(sys.argv) != 3:
    print('Usage:\npython conf.py [INPUT].jsonnet [OUTPUT].json')
    sys.exit(0)

f_inp = sys.argv[1]
f_out = sys.argv[2]

js = _jsonnet.evaluate_file(f_inp)
fp = open(f_out, 'w')
fp.write(js)
fp.close()
Esempio n. 24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('mode', help="preprocess/train/eval", choices=["preprocess", "train", "eval"])
    parser.add_argument('exp_config_file', help="jsonnet file for experiments")
    parser.add_argument('--use_scheduler', help='whether to use lr_scheduler for training.')
    parser.add_argument('--model_config_args', help="optional overrides for model config args")
    parser.add_argument('--logdir', help="optional override for logdir")
    args = parser.parse_args()

    exp_config = json.loads(_jsonnet.evaluate_file(args.exp_config_file))
    model_config_file = exp_config["model_config"]
    if "model_config_args" in exp_config:
        model_config_args = exp_config["model_config_args"]
        if args.model_config_args is not None:
            model_config_args_json = _jsonnet.evaluate_snippet("", args.model_config_args)
            model_config_args.update(json.loads(model_config_args_json))
        model_config_args = json.dumps(model_config_args)
    elif args.model_config_args is not None:
        model_config_args = _jsonnet.evaluate_snippet("", args.model_config_args)
    else:
        model_config_args = None

    logdir = args.logdir or exp_config["logdir"]

    trainset = exp_config["trainset"]
    valset = exp_config["valset"]

    if args.mode == "preprocess":
        preprocess_config = PreprocessConfig(model_config_file, model_config_args)
        preprocess.main(preprocess_config)
    elif args.mode == "train":
        train_config = TrainConfig(model_config_file,
                                   model_config_args, logdir, trainset, valset)
        if args.use_scheduler is None or args.use_scheduler == 'True':
            train.main(train_config)
        elif args.use_scheduler == 'False':
            train_noscheduler.main(train_config)
        else:
            print("use_scheduler not specified!")
    elif args.mode == "eval":
        for step in exp_config["eval_steps"]:
            infer_output_path = f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.infer"
            infer_config = InferConfig(
                model_config_file,
                model_config_args,
                logdir,
                exp_config["eval_section"],
                exp_config["eval_beam_size"],
                infer_output_path,
                step,
                use_heuristic=exp_config["eval_use_heuristic"]
            )
            infer.main(infer_config)

            eval_output_path = f"{exp_config['eval_output']}/{exp_config['eval_name']}-step{step}.eval"
            eval_config = EvalConfig(
                model_config_file,
                model_config_args,
                logdir,
                exp_config["eval_section"],
                infer_output_path,
                eval_output_path
            )
            eval.main(eval_config)

            res_json = json.load(open(eval_output_path))
            print(step, res_json['total_scores']['all']['exact'])
Esempio n. 25
0
def train(args):
    work_dir = args['--work-dir']
    config = json.loads(_jsonnet.evaluate_file(args['CONFIG_FILE']))
    config['work_dir'] = work_dir

    if not os.path.exists(work_dir):
        print(f'creating work dir [{work_dir}]', file=sys.stderr)
        os.makedirs(work_dir)

    if args['--extra-config']:
        extra_config = args['--extra-config']
        extra_config = json.loads(extra_config)
        config = util.update(config, extra_config)

    json.dump(config,
              open(os.path.join(work_dir, 'config.json'), 'w'),
              indent=2)

    model = RenamingModel.build(config)
    config = model.config
    model.train()

    if args['--cuda']:
        model = model.cuda()

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.Adam(params, lr=0.001)
    nn_util.glorot_init(params)

    # set the padding index for embedding layers to zeros
    # model.encoder.var_node_name_embedding.weight[0].fill_(0.)

    train_set = Dataset(config['data']['train_file'])
    dev_set = Dataset(config['data']['dev_file'])
    batch_size = config['train']['batch_size']

    print(f'Training set size {len(train_set)}, dev set size {len(dev_set)}',
          file=sys.stderr)

    # training loop
    train_iter = epoch = cum_examples = 0
    log_every = config['train']['log_every']
    evaluate_every_nepoch = config['train']['evaluate_every_nepoch']
    max_epoch = config['train']['max_epoch']
    max_patience = config['train']['patience']
    cum_loss = 0.
    patience = 0.
    t_log = time.time()

    history_accs = []
    while True:
        # load training dataset, which is a collection of ASTs and maps of gold-standard renamings
        train_set_iter = train_set.batch_iterator(
            batch_size=batch_size,
            return_examples=False,
            config=config,
            progress=True,
            train=True,
            num_readers=config['train']['num_readers'],
            num_batchers=config['train']['num_batchers'])
        epoch += 1

        for batch in train_set_iter:
            train_iter += 1
            optimizer.zero_grad()

            # t1 = time.time()
            nn_util.to(batch.tensor_dict, model.device)
            # print(f'[Learner] {time.time() - t1}s took for moving tensors to device', file=sys.stderr)

            # t1 = time.time()
            result = model(batch.tensor_dict,
                           batch.tensor_dict['prediction_target'])
            # print(f'[Learner] batch {train_iter}, {batch.size} examples took {time.time() - t1:4f}s', file=sys.stderr)

            loss = -result['batch_log_prob'].mean()

            cum_loss += loss.item() * batch.size
            cum_examples += batch.size

            loss.backward()

            # clip gradient
            grad_norm = torch.nn.utils.clip_grad_norm_(params, 5.)

            optimizer.step()
            del loss

            if train_iter % log_every == 0:
                print(
                    f'[Learner] train_iter={train_iter} avg. loss={cum_loss / cum_examples}, '
                    f'{cum_examples} examples ({cum_examples / (time.time() - t_log)} examples/s)',
                    file=sys.stderr)

                cum_loss = cum_examples = 0.
                t_log = time.time()

        print(f'[Learner] Epoch {epoch} finished', file=sys.stderr)

        if epoch % evaluate_every_nepoch == 0:
            print(f'[Learner] Perform evaluation', file=sys.stderr)
            t1 = time.time()
            # ppl = Evaluator.evaluate_ppl(model, dev_set, config, predicate=lambda e: not e['function_body_in_train'])
            eval_results = Evaluator.decode_and_evaluate(
                model, dev_set, config)
            # print(f'[Learner] Evaluation result ppl={ppl} (took {time.time() - t1}s)', file=sys.stderr)
            print(
                f'[Learner] Evaluation result {eval_results} (took {time.time() - t1}s)',
                file=sys.stderr)
            dev_metric = eval_results['func_body_not_in_train_acc']['accuracy']
            # dev_metric = -ppl
            if len(history_accs) == 0 or dev_metric > max(history_accs):
                patience = 0
                model_save_path = os.path.join(work_dir, f'model.bin')
                model.save(model_save_path)
                print(
                    f'[Learner] Saved currently the best model to {model_save_path}',
                    file=sys.stderr)
            else:
                patience += 1
                if patience == max_patience:
                    print(
                        f'[Learner] Reached max patience {max_patience}, exiting...',
                        file=sys.stderr)
                    patience = 0
                    exit()

            history_accs.append(dev_metric)

        if epoch == max_epoch:
            print(f'[Learner] Reached max epoch', file=sys.stderr)
            exit()

        t1 = time.time()
Esempio n. 26
0
 def load_jsonnet(p):
     import _jsonnet
     return json.loads(_jsonnet.evaluate_file(p))
Esempio n. 27
0
USE_TZ = True

TEST_RUNNER = 'django_nose.NoseTestSuiteRunner'

# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/2.0/howto/static-files/

STATIC_URL = '/static/'

# number of bits for the key, all auths should use the same number of bits
KEYBITS = 256

# Versioning
ALLOWED_VERSIONS = ['v1', 'v2']
DEFAULT_VERSION = 'v1'

try:
    from local_settings import *
except ImportError:
    print("local_settings.py not found")

# loading jsonnet config
if os.path.exists("config.jsonnet"):
    import json
    from _jsonnet import evaluate_file
    config = json.loads(evaluate_file("config.jsonnet"))
    for k, v in config.items():
        vars()[k] = v

INSTALLED_APPS = INSTALLED_APPS + MODULES
Esempio n. 28
0
if len(sys.argv) != 2:
    raise Exception('Usage: <filename>')


#  Returns content if worked, None if file not found, or throws an exception
def try_path(dir, rel):
    if not rel:
        raise RuntimeError('Got invalid filename (empty string).')
    if rel[0] == '/':
        full_path = rel
    else:
        full_path = dir + rel
    if full_path[-1] == '/':
        raise RuntimeError('Attempted to import a directory')

    if not os.path.isfile(full_path):
        return full_path, None
    with open(full_path) as f:
        return full_path, f.read()


def import_callback(dir, rel):
    full_path, content = try_path(dir, rel)
    if content:
        return full_path, content
    raise RuntimeError('File not found')


sys.stdout.write(
    _jsonnet.evaluate_file(sys.argv[1], import_callback=import_callback))
Esempio n. 29
0
        full_path = dir + rel
    if full_path[-1] == '/':
        raise RuntimeError('Attempted to import a directory')

    if not os.path.isfile(full_path):
        return full_path, None
    with open(full_path) as f:
        return full_path, f.read()


def import_callback(dir, rel):
    full_path, content = try_path(dir, rel)
    if content:
        return full_path, content
    raise RuntimeError('File not found')

# Test native extensions
def concat(a, b):
    return a + b

native_callbacks = {
  'concat': (('a', 'b'), concat),
}

json_str = _jsonnet.evaluate_file(
    sys.argv[1],
    import_callback=import_callback,
    native_callbacks=native_callbacks,
)
sys.stdout.write(json_str)
Esempio n. 30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--logdir', required=True)
    parser.add_argument('--config', required=True)
    parser.add_argument('--config-args')
    args = parser.parse_args()

    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    if args.config_args:
        config = json.loads(_jsonnet.evaluate_file(args.config, tla_codes={'args': args.config_args}))
    else:
        config = json.loads(_jsonnet.evaluate_file(args.config))

    if 'model_name' in config:
        args.logdir = os.path.join(args.logdir, config['model_name'])
    train_config = registry.instantiate(TrainConfig, config['train'])

    reopen_to_flush = config.get('log', {}).get('reopen_to_flush')
    logger = Logger(os.path.join(args.logdir, 'log.txt'), reopen_to_flush)
    with open(os.path.join(args.logdir,
          'config-{}.json'.format(
            datetime.datetime.now().strftime('%Y%m%dT%H%M%S%Z'))), 'w') as f:
        json.dump(config, f, sort_keys=True, indent=4)
    logger.log('Logging to {}'.format(args.logdir))

    init_random = random_state.RandomContext(train_config.init_seed)
    data_random = random_state.RandomContext(train_config.data_seed)
    model_random = random_state.RandomContext(train_config.model_seed)

    with init_random:
        # 0. Construct preprocessors
        model_preproc = registry.instantiate(
            registry.lookup('model', config['model']).Preproc,
            config['model'],
            unused_keys=('name',))
        model_preproc.load()

        # 1. Construct model
        model = registry.construct('model', config['model'],
                unused_keys=('encoder_preproc', 'decoder_preproc'), preproc=model_preproc, device=device)
        model.to(device)

        optimizer = registry.construct('optimizer', config['optimizer'], params=model.parameters())
        lr_scheduler = registry.construct(
                'lr_scheduler',
                config.get('lr_scheduler', {'name': 'noop'}),
                optimizer=optimizer)

    # 2. Restore its parameters
    saver = saver_mod.Saver(
        model, optimizer, keep_every_n=train_config.keep_every_n)
    last_step = saver.restore(args.logdir)

    # 3. Get training data somewhere
    with data_random:
        train_data = model_preproc.dataset('train')
        train_data_loader = yield_batches_from_epochs(
            torch.utils.data.DataLoader(
                train_data,
                batch_size=train_config.batch_size,
                shuffle=True,
                drop_last=True,
                collate_fn=lambda x: x))
    train_eval_data_loader = torch.utils.data.DataLoader(
            train_data,
            batch_size=train_config.eval_batch_size,
            collate_fn=lambda x: x)

    val_data = model_preproc.dataset('val')
    val_data_loader = torch.utils.data.DataLoader(
            val_data,
            batch_size=train_config.eval_batch_size,
            collate_fn=lambda x: x)

    # 4. Start training loop
    with data_random:
        for batch in train_data_loader:
            # Quit if too long
            if last_step >= train_config.max_steps:
                break

            # Evaluate model
            if last_step % train_config.eval_every_n == 0:
                if train_config.eval_on_train:
                    eval_model(logger, model, last_step, train_eval_data_loader, 'train', num_eval_items=train_config.num_eval_items)
                if train_config.eval_on_val:
                    eval_model(logger, model, last_step, val_data_loader, 'val', num_eval_items=train_config.num_eval_items)

            # Compute and apply gradient
            with model_random:
                optimizer.zero_grad()
                loss = model.compute_loss(batch)
                loss.backward()
                lr_scheduler.update_lr(last_step)
                optimizer.step()

            # Report metrics
            if last_step % train_config.report_every_n == 0:
                logger.log('Step {}: loss={:.4f}'.format(last_step, loss.item()))

            last_step += 1
            # Run saver
            if last_step % train_config.save_every_n == 0:
                saver.save(args.logdir, last_step)
Esempio n. 31
0
                                    mtest.ids_lengths: ids_lengths,
                                    mtest.labels: labels,
                                }))

        preds = preds[:batch_size_original]
        return preds


parser = argparse.ArgumentParser()
parser.add_argument('--dataset', required=True)
parser.add_argument('--load-model')
parser.add_argument('--config')
parser.add_argument('--input')
args = parser.parse_args()
input_str = args.input
data = pickle.load(open(args.dataset))
word_ids = data['word_ids']
labelers = data['labelers']

pretty_config_str = _jsonnet.evaluate_file(args.config)

config = json.loads(pretty_config_str)
ifttt_train = IftttTrain(args, config)
with tf.variable_scope('model', reuse=None, initializer=None):
    m = ifttt_train.create_model(is_training=False)

saver = tf.train.Saver(max_to_keep=0)
with tf.Session() as sess:
    saver.restore(sess, args.load_model)
    app.run()
Esempio n. 32
0
# Copyright 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import _jsonnet

if len(sys.argv) != 2:
    raise Exception("Usage: <filename>")

sys.stdout.write(_jsonnet.evaluate_file(sys.argv[1]))
Esempio n. 33
0
import sys
import _jsonnet
import json
from kpm.template_filters import jsonnet_callbacks
from kpm.render_jsonnet import RenderJsonnet

#r = RenderJsonnet()
#result = r.render_jsonnet(open(sys.argv[1]).read())
def native_bool(b):
    return ['true', True, False, 1, 0]

json_str = _jsonnet.evaluate_file(
    sys.argv[1],
    native_callbacks={"nativeBool": (("bool",), native_bool)},
)

sys.stdout.write(json_str)
#sys.stdout.write(json.dumps(result))
Esempio n. 34
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', required=True)
    parser.add_argument('--config-args')
    args = parser.parse_args()

    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    if args.config_args:
        config = json.loads(
            _jsonnet.evaluate_file(args.config,
                                   tla_codes={'args': args.config_args}))
    else:
        config = json.loads(_jsonnet.evaluate_file(args.config))

    # 0. Construct preprocessors
    model_preproc = registry.instantiate(registry.lookup(
        'model', config['model']).Preproc,
                                         config['model'],
                                         unused_keys=('name', ))
    model_preproc.load()

    # 1. Construct model
    model = registry.construct('model',
                               config['model'],
                               unused_keys=('encoder_preproc',
                                            'decoder_preproc'),
                               preproc=model_preproc,
                               device=device)
    model.to(device)
    model.eval()

    # 3. Get training data somewhere
    train_data = model_preproc.dataset('train')
    train_eval_data_loader = torch.utils.data.DataLoader(
        train_data, batch_size=10, collate_fn=lambda x: x)

    batch = next(iter(train_eval_data_loader))
    descs = [x for x, y in batch]

    q0, qb = test_enc_equal([descs[0]['question']],
                            [[desc['question']] for desc in descs],
                            model.encoder.question_encoder)

    c0, cb = test_enc_equal(descs[0]['columns'],
                            [desc['columns'] for desc in descs],
                            model.encoder.column_encoder)

    t0, tb = test_enc_equal(descs[0]['tables'],
                            [desc['tables'] for desc in descs],
                            model.encoder.table_encoder)

    q0_enc, c0_enc, t0_enc = model.encoder.encs_update.forward_unbatched(
        descs[0], q0[0], c0[0], c0[1], t0[0], t0[1])
    qb_enc, cb_enc, tb_enc = model.encoder.encs_update.forward(
        descs, qb[0], cb[0], cb[1], tb[0], tb[1])

    check_close(q0_enc.squeeze(1), qb_enc.select(0))
    check_close(c0_enc.squeeze(1), cb_enc.select(0))
    check_close(t0_enc.squeeze(1), tb_enc.select(0))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--beam-size', type=int, default=1)
    script_args = parser.parse_args()

    for (glove, upd_type, num_layers), att in itertools.product((
        (False, 'full', 4),
        (True, 'no_subtypes', 4),
        (True, 'merge_types', 4),
        (True, 'full', 2),
        (True, 'full', 0),
    ), (0, 1, 2)):
        steps = list(range(1100, 40000, 1000)) + [40000]
        args = '{{glove: {glove}, upd_type: \'{upd_type}\', num_layers: {num_layers}, att: {att}}}'.format(
            glove='true' if glove else 'false',
            upd_type=upd_type,
            num_layers=num_layers,
            att=att)
        config = json.loads(
            _jsonnet.evaluate_file(
                'configs/spider-20190205/nl2code-0521-ablations.jsonnet',
                tla_codes={'args': args}))
        logdir = os.path.join('logdirs/20190521-ablations',
                              config['model_name'])

        for step in steps:
            if not os.path.exists(
                    os.path.join(logdir,
                                 'model_checkpoint-{:08d}'.format(step))):
                continue

            if os.path.exists(
                    os.path.join(
                        logdir, 'eval-val-step{:05d}-bs{}.jsonl'.format(
                            step, script_args.beam_size))):
                continue

            infer_command = ((
                'python infer.py '
                '--config configs/spider-20190205/nl2code-0521-ablations.jsonnet '
                '--logdir logdirs/20190521-ablations '
                '--config-args "{args}" ' +
                '--output __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl '
                +
                '--step {step} --section val --beam-size {beam_size}').format(
                    args=args,
                    step=step,
                    beam_size=script_args.beam_size,
                ))

            eval_command = ((
                'python eval.py --config configs/spider-20190205/nl2code-0521-ablations.jsonnet '
                + '--logdir logdirs/20190521-ablations ' +
                '--config-args "{args}" ' +
                '--inferred __LOGDIR__/infer-val-step{step:05d}-bs{beam_size}.jsonl '
                +
                '--output __LOGDIR__/eval-val-step{step:05d}-bs{beam_size}.jsonl '
                + '--section val').format(
                    args=args,
                    step=step,
                    beam_size=script_args.beam_size,
                ))

            print('{} && {}'.format(infer_command, eval_command))
Esempio n. 36
0
import _jsonnet

if len(sys.argv) != 2:
    raise Exception('Usage: <filename>')

#  Returns content if worked, None if file not found, or throws an exception
def try_path(dir, rel):
    if not rel:
        raise RuntimeError('Got invalid filename (empty string).')
    if rel[0] == '/':
        full_path = rel
    else:
        full_path = dir + rel
    if full_path[-1] == '/':
        raise RuntimeError('Attempted to import a directory')

    if not os.path.isfile(full_path):
        return full_path, None
    with open(full_path) as f:
        return full_path, f.read()


def import_callback(dir, rel):
    full_path, content = try_path(dir, rel)
    if content:
        return full_path, content
    raise RuntimeError('File not found')

sys.stdout.write(_jsonnet.evaluate_file(sys.argv[1], import_callback=import_callback))
def process_cm_binary_data(name,
                           data,
                           main_jsonnet,
                           ext_libs=[],
                           user_args={}):
    """Process binary_data field from jsonnet configMap.

    Extracts folder, evaluates main_jsonnet file from folder
    and parses it to separate json objects.
    main_jsonnet should generate all jsons in one json file.

    Args:
        name (str): Config map name.
        data (dict): Binary data from configMap labeled as jsonnet code.
            It should be base64 encoded jsonnet folder (archive).
        main_jsonnet (str): Path in extracted folder to jsonnet file
            that will be evaluated.
        ext_libs (:obj:`list of str`, optional): List of paths to
            external jsonnet libs.
        user_args (:obj:`dict`, optional): Keyword arguments to jsonnet build function.

    Returns:
        list of (str, dict): Generated json data.

    Raises:
        JsonnetConfigMapError: Raised if jsonnet evaluation fails or
            wrong archive format is provided.
    """
    tmp_folder_name = f"jsonnet_archive_{name}"
    tmp_file_name = f"generated_from_archive_{name}.json"

    jsons = []
    for dataKey in data.keys():
        filename, extension = os.path.splitext(dataKey)

        if extension not in [
                ".gz",
                ".tar",
                ".zip",
                ".bz2",
                ".7z",
                ".tgz",
                ".rar",
                ".xz",
        ]:
            log.error(f"Unsupported archive format: {dataKey}")
            raise JsonnetConfigMapError

        archive_data = data[dataKey]
        utils.extract_archive_data(archive_data, dataKey, tmp_folder_name)

        jsonnet_filepath = os.path.join(tmp_folder_name, main_jsonnet)
        try:
            json_ = _jsonnet.evaluate_file(jsonnet_filepath,
                                           jpathdir=ext_libs,
                                           **user_args)
        except RuntimeError as e:
            log.error(
                f"{main_jsonnet} is not a valid jsonnet, raised error: {e}")
            utils.remove_folder(tmp_folder_name)
            raise JsonnetConfigMapError
        else:
            utils.save_text_to_file("./", tmp_file_name, json_)
            dashboards = parse_json_with_files(tmp_file_name)
            jsons.extend(dashboards)

            utils.remove_file("./", tmp_file_name)
            utils.remove_folder(tmp_folder_name)

    return jsons
Esempio n. 38
0
if __name__ == '__main__':
    parser = ArgumentParser(description="Training of Sentence VAE")
    parser.add_argument("--config", type=str, required=True, metavar='PATH',
                        help="Path to a configuration file.")
    parser.add_argument("--hyper-parameters", type=str, metavar='PATH',
                        help="Path to a hyper parameters file.")
    parser.add_argument("--run-dir", type=str, required=True, metavar='PATH',
                        help="Path to a directory where model checkpoints will be stored.")
    parser.add_argument("--force", action='store_true',
                        help="Whether to rewrite data if run directory already exists.")
    parser.add_argument("--experiment-name", type=str, metavar="ID",
                        help="Name of experiment if training process is run under mlflow")
    parser.add_argument("--verbose", action='store_true',
                        help="Verbosity of the training script.")
    args = parser.parse_args()

    if args.experiment_name is not None:
        if args.hyper_parameters is None:
            raise ValueError("You should provide hyper-parameters file to log into mlflow.")
        with open(args.hyper_parameters) as fp:
            h_params = json.load(fp)
        mlflow.set_tracking_uri(args.run_dir)
        mlflow_client = MlflowClient(args.run_dir)
        experiment_id = get_experiment_id(mlflow_client, args.experiment_name)
        tags = get_git_tags(Path.cwd())
        run_experiment(h_params, args.config, mlflow_client, experiment_id, tags=tags, verbose=args.verbose)
    else:
        params = json.loads(evaluate_file(args.config))
        train(args.run_dir, params, args.force, verbose=args.verbose)
Esempio n. 39
0
def load_env(pattern, core_dir=worldgen_path(), envs_dir='examples', xmls_dir='xmls',
             return_args_remaining=False, **kwargs):
    """
    Flexible load of an environment based on `pattern`.
    Passes args to make_env().
    :param pattern: tries to match environment to the pattern.
    :param core_dir: Absolute path to the core code directory for the project containing
        the environments we want to examine. This is usually the top-level git repository
        folder - in the case of the mujoco-worldgen repo, it would be the 'mujoco-worldgen'
        folder.
    :param envs_dir: relative path (from core_dir) to folder containing all environment files.
    :param xmls_dir: relative path (from core_dir) to folder containing all xml files.
    :param return_remaining_kwargs: returns arguments from kwargs that are not used.
    :param kwargs: arguments passed to the environment function.
    :return: mujoco_worldgen.Env
    """
    # Loads environment based on XML.
    env = None
    args_remaining = {}
    if pattern.endswith(".xml"):
        if len(kwargs) > 0:
            print("Not passing any argument to environment, "
                  "because environment is loaded from XML. XML doesn't "
                  "accept any extra input arguments")

        def get_sim(seed):
            model = load_model_from_path_fix_paths(xml_path=pattern)
            return MjSim(model)
        env = Env(get_sim=get_sim)
    # Loads environment based on mjb.
    elif pattern.endswith(".mjb"):
        if len(kwargs) != 0:
            print("Not passing any argument to environment, "
                  "because environment is loaded from MJB. MJB doesn't "
                  "accept any extra input arguments")

        def get_sim(seed):
            model = load_model_from_mjb(pattern)
            return MjSim(model)
        env = Env(get_sim=get_sim)
    # Loads environment from a python file
    elif pattern.endswith("py") and os.path.exists(pattern):
        print("Loading env from the module: %s" % pattern)
        module = run_path(pattern)
        make_env = module["make_env"]
        args_to_pass, args_remaining = extract_matching_arguments(make_env, kwargs)
        env = make_env(**args_to_pass)
    elif pattern.endswith(".jsonnet") and os.path.exists(pattern):
        env_data = json.loads(_jsonnet.evaluate_file(pattern))
        make_env = get_function(env_data['make_env'])
        args_to_pass, args_remaining = extract_matching_arguments(make_env, kwargs)
        env = make_env(**args_to_pass)
    else:
        # If couldn't load based on easy search, then look
        # into predefined subdirectories.
        matching = (glob(join(core_dir, envs_dir, "**", "*.py"), recursive=True) +
                    glob(join(core_dir, xmls_dir, "**", "*.xml"), recursive=True))
        matching = [match for match in matching if match.find(pattern) > -1]
        matching = [match for match in matching if not os.path.basename(match).startswith('test_')]
        assert len(matching) < 2, "Found multiple environments matching %s" % str(matching)
        if len(matching) == 1:
            return load_env(matching[0], return_args_remaining=return_args_remaining, **kwargs)
    if return_args_remaining:
        return env, args_remaining
    else:
        return env