Esempio n. 1
0
def run(config_path, config_string, name):
    runs = RetrieverTrainingRuns(check_commit=False)
    config = Config.from_file(config_path)
    if config_string:
        config = Config.merge([config, Config.from_str(config_string)])
    run = runs.new(config, name)
    run.train()
Esempio n. 2
0
    def default_format(cls, workspace):
        # get experiment name
        path = workspace.root
        name = os.path.split(path)[-1]

        # steps taken
        ckpt_nums = EditTrainingRun._checkpoint_numbers(workspace.checkpoints)
        steps = max(ckpt_nums) if ckpt_nums else 0

        # metadata
        meta = Metadata.from_file(workspace.metadata)
        bleu = meta.get('bleu_valid', None)
        gleu = meta.get('gleu_valid', None)
        ribes = meta.get('ribes_valid', None)
        chrf = meta.get('chrf_valid', None)
        loss = meta.get('loss_valid', None)
        dirty_repo = meta.get('dirty_repo', '?')

        # dataset
        config = Config.from_file(workspace.config)
        dataset = config.dataset.path

        return '{name:10} -- steps: {steps:<10}, loss: {loss:.2f}, dset: {dset:15}, bleu: {bleu:.2f}, gleu: {gleu:.2f}, ribes: {ribes:.2f}, chrf: {chrf:.2f} ' \
               'dirty_repo: {dirty_repo}'.format(
                name=name, dset=dataset, steps=steps, loss=loss, bleu=bleu, gleu=gleu, ribes=ribes, chrf=chrf, dirty_repo=dirty_repo)
Esempio n. 3
0
    def __init__(self, debug):
        # load from configuration
        conf = Config.from_file(data.workspace.turk.coherence.config.txt)
        self.title = conf.title
        self.description = conf.description
        self.keywords = conf.keywords
        self.price = conf.price
        self.duration = eval(conf.duration)
        self.approval_delay = eval(conf.approval_delay)

        # store form specification as JSON, to be built automatically on launch
        with open(data.workspace.turk.coherence.form.json) as form_json:
            self.form_json = form_json.read()

        price_per_hit = 0.0 if debug else self.price

        quals = standard_quals(debug)

        hit_type_ids = mtc.register_hit_type(
            title=self.title,
            description=self.description,
            reward=price_per_hit,
            duration=self.duration,
            keywords=self.keywords,
            approval_delay=self.approval_delay,
            qual_req=quals)
        hit_type_id = hit_type_ids[0].HITTypeId

        super(CoherenceTask, self).__init__(hit_type_id, mtc)
Esempio n. 4
0
    def __getitem__(self, i):
        """Reload an existing TrainingRun."""
        save_dir = self._int_dirs[i]
        config = Config.from_file(self._config_path(save_dir))
        run = self._run_factory(config, save_dir)
        if self._check_commit:
            run.match_commit(self._src_dir)

        logging.info('Reloaded TrainingRun #{}'.format(i))
        return run
    def __getitem__(self, i):
        """Reload an existing Experiment."""
        save_dir = self._int_dirs[i]
        config = Config.from_file(self._config_path(save_dir))
        exp = self._exp_factory(config, save_dir)
        if self._check_commit:
            exp.match_commit(self._src_dir)

        logging.info('Reloaded experiment #{}'.format(i))
        return exp
Esempio n. 6
0
def get_metadata(bundle_spec):
    """
    
    Args:
        bundle_spec (BundleSpec)

    Returns:
        Config
    """
    path = 'data/experiments/0_unnamed/metadata.txt'
    with open_cl_file(bundle_spec, path) as f:
        f_path = os.path.realpath(f.name)
        metadata = Config.from_file(f_path)
    return metadata
    def new(self, config=None, name=None):
        """Create a new Experiment."""
        if config is None:
            config = Config.from_file(self._default_config_path)

        save_dir = self._int_dirs.new_dir(name=name)
        cfg_path = self._config_path(save_dir)
        config.to_file(cfg_path)  # save the config
        exp = self._exp_factory(config, save_dir)
        exp.record_commit(self._src_dir)

        logging.info('New experiment created at: {}'.format(
            exp.workspace.root))
        logging.info('Experiment configuration:\n{}'.format(config))
        return exp
Esempio n. 8
0
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('exp_id', nargs='+')
arg_parser.add_argument('-c', '--check_commit', default='strict')
arg_parser.add_argument('-p', '--profile', action='store_true')
args = arg_parser.parse_args()

# create experiment
experiments = EditTrainingRuns(
    check_commit=(args.check_commit == 'disable'))  #'strict'  wyl

exp_id = args.exp_id
if exp_id == ['default']:
    # new default experiment
    exp = experiments.new()
elif len(exp_id) == 1 and exp_id[0].isdigit():
    # reload old experiment
    exp = experiments[int(exp_id[0])]
else:
    # new experiment according to configs
    config = Config.from_file(exp_id[0])
    for filename in exp_id[1:]:
        config = Config.merge(config, Config.from_file(filename))
    exp = experiments.new(config)  # new experiment from config

# start training
exp.workspace.add_file('stdout', 'stdout.txt')
exp.workspace.add_file('stderr', 'stderr.txt')

with save_stdout(exp.workspace.root):
    exp.train()
Esempio n. 9
0
import argparse

from gtd.utils import Config
from textmorph.language_model.training_run import LMTrainingRuns

arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('config_path')
args = arg_parser.parse_args()

runs = LMTrainingRuns()
config = Config.from_file(args.config_path)
run = runs.new(config)

run.train()
from gtd.ml.utils import TensorBoardLogger
from gtd.utils import Config

from strongsup.decoder import Decoder
from strongsup.domain import get_domain
from strongsup.embeddings import GloveEmbeddings

set_log_level('DEBUG')

arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('exp_id', nargs='+')
args = arg_parser.parse_args()
exp_id = args.exp_id

glove_embeddings = GloveEmbeddings(vocab_size=20000)
config = Config.from_file(exp_id[0])
domain = get_domain(config)
utterance_length = config.parse_model.utterance_embedder.utterance_length
utterance_num = config.parse_model.utterance_embedder.utterance_num
iterations_per_utterance = config.decoder.train_exploration_policy.iterations_per_utterance
tb_logger = TensorBoardLogger(
    os.path.join(os.getcwd(), 'data', 'decomposable', 'tensorboard'))
decomposable_weights_file = os.path.join(os.getcwd(), 'data', 'decomposable',
                                         'decomposable_weights_{}.hdf5')
decomposable_config_file = os.path.join(os.getcwd(), 'data', 'decomposable',
                                        'decomposable_config.ini')

deco_config = ConfigParser.ConfigParser()
deco_config.read(decomposable_config_file)
deco_config = dict(deco_config.items('Params'))
deco_config['lr'] = float(deco_config['lr'])
from gtd.utils import Config

from editor_code.copy_editor.edit_training_run import EditTrainingRun
from editor_code.copy_editor.retrieve_edit_run import RetrieveEditTrainingRun
from editor_code.copy_editor.editor import EditExample
from editor_code.copy_editor.vocab import HardCopyDynamicVocab

from gtd.utils import bleu

print os.environ['COPY_EDIT_DATA']

# no-profile
profile = False

config_file = 'default.txt'
config = Config.from_file('editor_code/configs/editor/' + config_file)

src_dir_noret = os.environ['COPY_EDIT_DATA'] + '/edit_runs/7'  #for codalab
load_expt_noret = EditTrainingRun(config, src_dir_noret)
src_dir = os.environ['COPY_EDIT_DATA'] + 'edit_runs/7'  #for codalab
load_expt = RetrieveEditTrainingRun(config, src_dir)

###
# retedit model
import numpy as np

ret_model = load_expt.editor.ret_model
# edit_model = load_expt.editor.edit_model # since we only care about the retriever here
examples = load_expt._examples

from gtd.utils import chunks
Esempio n. 12
0
arg_parser.add_argument('-n', '--name', default='unnamed')
arg_parser.add_argument('-r', '--seed', default=0, type=int)
arg_parser.add_argument('-x', '--reward_configs', action='append', default=[])
arg_parser.add_argument('config_paths', nargs='+')
args = arg_parser.parse_args()

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

# create run
runs = HRLTrainingRuns(check_commit=(args.check_commit == 'strict'))

config_paths = args.config_paths
if len(config_paths) == 1 and config_paths[0].isdigit():
    configs = [Config.from_file(p) for p in args.reward_configs]
    run = runs.clone(int(config_paths[0]), configs, args.name)
else:
    # new run according to configs
    configs = [Config.from_file(p) for p in config_paths]

    # merge all configs together
    config = Config.merge(configs)  # later configs overwrite earlier configs
    run = runs.new(config, name=args.name)  # new run from config

    run.metadata['description'] = args.description
    run.metadata['name'] = args.name

run.metadata['host'] = socket.gethostname()

# start training
Esempio n. 13
0
from gtd.utils import Config

from editor_code.copy_editor.edit_training_run import EditTrainingRun
from editor_code.copy_editor.context_vae_training_run import ContextVAETrainingRun
from editor_code.copy_editor.editor import EditExample
from editor_code.copy_editor.vocab import HardCopyDynamicVocab

from gtd.utils import bleu

print os.environ['COPY_EDIT_DATA']

# no-profile
profile = False

config = Config.from_file('editor_code/configs/editor/github_original.txt')
src_dir_noret = os.environ['COPY_EDIT_DATA'] + '/edit_runs_base/0'  #for codalab
src_dir_vae = os.environ['COPY_EDIT_DATA'] + '/edit_runs_base/1'  #for codalab
load_expt_noret = EditTrainingRun(config, src_dir_noret)
load_expt_vae = ContextVAETrainingRun(config, src_dir_vae)

import numpy as np
edit_model_noret = load_expt_noret.editor
examples = load_expt_noret._examples

from gtd.utils import chunks
from tqdm import tqdm

eval_num = 500
beam_list_noret, edit_traces_noret = edit_model_noret.edit(
    examples.test[0:eval_num])
Esempio n. 14
0
# create experiment
experiments = EditTrainingRuns(check_commit=(args.check_commit == 'strict'))

exp_id = args.exp_id
if exp_id == ['default']:
    # new default experiment
    exp = experiments.new()
elif len(exp_id) == 1 and exp_id[0].isdigit():
    # reload old experiment
    exp = experiments[int(exp_id[0])]
else:
    # new experiment according to configs
    soup = args.seed
    num_iter = args.num_iter
    eps = args.eps
    momentum = args.momentum
    config = Config.from_file(exp_id[0], soup, [num_iter, eps, momentum])
    config.seed = args.seed
    config.num_iter = args.num_iter
    for filename in exp_id[1:]:
        config = Config.merge(config, Config.from_file(filename))
    exp = experiments.new(config)  # new experiment from config

# start training
exp.workspace.add_file('stdout', 'stdout.txt')
exp.workspace.add_file('stderr', 'stderr.txt')

with save_stdout(exp.workspace.root):
    exp.train()
Esempio n. 15
0
exp_id = args.exp_id
exp_mode = args.mode
eval_samples = args.eval_samples

# create experiment
experiments = Experiments(check_commit=args.check_commit=='strict')

if exp_id == ['default']:
    # new default experiment
    exp = experiments.new()
elif len(exp_id) == 1 and exp_id[0].isdigit():
    # reload old experiment
    exp = experiments[int(exp_id[0])]
else:
    # new experiment according to configs
    config = Config.from_file(exp_id[0])
    for filename in exp_id[1:]:
        config = Config.merge(config, Config.from_file(filename))
    exp = experiments.new(config)  # new experiment from config

# add experiment to tracker
if args.tracker:
    exp_type, dataset, seed = ExperimentType.parse_configs(exp_id)
    with TopLevelTracker(args.tracker) as tracker:
        tracker.register_result(
                dataset, exp_type, seed, exp.workspace.root)

################################
# Profiling
# from gtd.chrono import Profiling, Profiler
# profiler = Profiler.default()
Esempio n. 16
0
import paths
import os
#os.environ['COPY_EDIT_DATA']=paths.data_dir
os.environ['COPY_EDIT_DATA'] = './data/'
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
from gtd.utils import Config

from editor_code.copy_editor.edit_training_run import EditTrainingRuns
print os.environ['COPY_EDIT_DATA']

#no-profile
profile = False

runs = EditTrainingRuns()
config = Config.from_file('editor_code/configs/editor/github_s2s.txt')
run = runs.new(config)

if profile:
    from gtd.chrono import Profiling, Profiler

    profiler = Profiler.default()

    import editor_code.copy_editor.retriever
    import editor_code.copy_editor.editor
    profiler.add_module(editor_code.copy_editor.editor)
    profiler.add_module(editor_code.copy_editor.retriever)
    Profiling.start()
    run.train()
    Profiler.report(profiler)  # prints out report

else:
Esempio n. 17
0
np.random.seed(args.seed)
torch.manual_seed(args.seed)


# create run
runs = MiniWoBTrainingRuns(check_commit=(args.check_commit == 'strict'))

config_paths = args.config_paths

infer = False
if len(config_paths) == 1 and config_paths[0].isdigit():
    # reload old run
    run = runs[int(config_paths[0])]
else:
    # new run according to configs
    configs = [Config.from_file(p) for p in config_paths]

    # add task config
    repo_dir = abspath(dirname(__file__))
    config_dir = join(repo_dir, 'configs')

    task_config_path = join(config_dir, 'task-mixins', '{}.txt'.format(args.task))
    if os.path.exists(task_config_path):
        # use existing config if it exists
        task_config = Config.from_file(task_config_path)
    else:
        # otherwise, create a very basic config
        task_config = Config.from_str('env.subdomain = {}'.format(args.task))
    configs.append(task_config)

    # add string configs
Esempio n. 18
0
from editor_code.copy_editor.edit_training_run import EditTrainingRun
from editor_code.copy_editor.retrieve_edit_run import RetrieveEditTrainingRun
from editor_code.copy_editor.editor import EditExample
from editor_code.copy_editor.vocab import HardCopyDynamicVocab

from gtd.utils import bleu

print os.environ['COPY_EDIT_DATA']

# no-profile
profile = False

# config = Config.from_file('./editor_code/configs/editor/old/scifi_200d.txt')
# src_dir = os.environ['COPY_EDIT_DATA']+'/scifi_runs/200d' #for codalab

config = Config.from_file(
    './editor_code/configs/editor/all-sci-fi-data.100d.txt')
src_dir = os.environ['COPY_EDIT_DATA'] + '/scifi_runs/final-all'  #for codalab

#config = Config.from_file('./editor_code/configs/editor/scifi_5tuple.txt')
#src_dir = os.environ['COPY_EDIT_DATA']+'/scifi_runs/5tuple' #for codalab

load_expt = RetrieveEditTrainingRun(config, src_dir)

###
# retedit model
import numpy as np

ret_model = load_expt.editor.ret_model
edit_model = load_expt.editor.edit_model
examples = load_expt._examples
Esempio n. 19
0
from gtd.utils import Config

from editor_code.copy_editor.edit_training_run import EditTrainingRun
from editor_code.copy_editor.retrieve_edit_run import RetrieveEditTrainingRun
from editor_code.copy_editor.editor import EditExample
from editor_code.copy_editor.vocab import HardCopyDynamicVocab

from gtd.utils import bleu

print os.environ['COPY_EDIT_DATA']

# no-profile
profile = False

config = Config.from_file('editor_code/configs/editor/scifi_100d.txt')

src_dir = os.environ['COPY_EDIT_DATA'] + '/edit_runs/9'  #for codalab
load_expt = RetrieveEditTrainingRun(config, src_dir)

###
# retedit model
import numpy as np

ret_model = load_expt.editor.ret_model
edit_model = load_expt.editor.edit_model
examples = load_expt._examples

from gtd.utils import chunks
from tqdm import tqdm
Esempio n. 20
0
from gtd.turk import Task, get_mturk_connection, standard_quals
from gtd.utils import Config
from textmorph import data
"""
To review completed HITs:
- Go to: https://requester.mturk.com/mturk/manageHITs

To do a HIT:
- Go to: https://worker.mturk.com/
- Search for "percy liang"
- Click "Accept & Work"
  - For some reason, I had trouble viewing these HITs on Google Chrome (invalid URL parameter error).
  - On Firefox, things are fine.
"""

config = Config.from_file(data.workspace.config)
mtc = get_mturk_connection(config.aws_access_key_id,
                           config.aws_secret_access_key,
                           sandbox=False)


class SimilarityTask(Task):
    def __init__(self, debug):
        # load from configuration
        conf = Config.from_file(data.workspace.turk.similarity.config.txt)
        self.title = conf.title
        self.description = conf.description
        self.keywords = conf.keywords
        self.price = conf.price
        self.duration = eval(conf.duration)
        self.approval_delay = eval(conf.approval_delay)
Esempio n. 21
0
validation_dir = os.environ[
    'COPY_EDIT_DATA'] + '/datasets/card2code/third_party/hearthstone/dev_hs'

output_list = []
with io.open(validation_dir + '.out', 'r', encoding='utf-8') as fopen:
    for line in fopen:
        output_list.append(line.strip())

out_proc = [tok_str(proc_str(out)) for out in output_list]
iin = load_input(validation_dir)
valid_ex = make_eexs(iin, out_proc)

#no-profile
profile = False

config = Config.from_file('editor_code/configs/editor/default.txt')
src_dir = os.environ['COPY_EDIT_DATA'] + '/edit_runs/0'
print 'loading model'
print src_dir
load_expt = RetrieveEditTrainingRun(config, src_dir)  #highest valid bleu.

import numpy as np

vae_editor = load_expt.editor.vae_model
ret_model = load_expt.editor.ret_model
edit_model = load_expt.editor.edit_model
examples = load_expt._examples

new_vecs = ret_model.batch_embed(examples.train, train_mode=False)
full_lsh = ret_model.make_lsh(new_vecs)
valid_eval = ret_model.ret_and_make_ex(valid_ex,
from editor_code.copy_editor.edit_training_run import EditTrainingRun
from editor_code.copy_editor.retrieve_edit_run import RetrieveEditTrainingRun
from editor_code.copy_editor.editor import EditExample
from editor_code.copy_editor.vocab import HardCopyDynamicVocab

from gtd.utils import bleu

print os.environ['COPY_EDIT_DATA']

# no-profile
profile = False

# config = Config.from_file('./editor_code/configs/editor/old/scifi_200d.txt')
# src_dir = os.environ['COPY_EDIT_DATA']+'/scifi_runs/200d' #for codalab

config = Config.from_file('./editor_code/configs/editor/scifi_5tuple_gen.txt')
src_dir = os.environ['COPY_EDIT_DATA'] + '/scifi_runs/final'  #for codalab

#config = Config.from_file('./editor_code/configs/editor/scifi_5tuple.txt')
#src_dir = os.environ['COPY_EDIT_DATA']+'/scifi_runs/5tuple' #for codalab

load_expt = RetrieveEditTrainingRun(config, src_dir)

###
# retedit model
import numpy as np

ret_model = load_expt.editor.ret_model
edit_model = load_expt.editor.edit_model
examples = load_expt._examples