def test_multiple_models(self, dataset, model, synthesizer): with tempfile.TemporaryDirectory() as tmpdir: input = os.path.join(tmpdir, "input") output = os.path.join(tmpdir, "output") os.makedirs(input) os.makedirs(os.path.join(input, "model")) torch.save({"score": 0.5, "model": {"score": 0.5, "name": "tmp"}}, os.path.join(input, "model", "0")) torch.save({"score": 1.0, "model": {"score": 1.0, "name": "tmp"}}, os.path.join(input, "model", "1")) evaluate(input, output, dataset, model, synthesizer, { "accuracy": use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), "bleu": use_environment( Bleu(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), }) assert os.path.exists(os.path.join(output, "result.pt")) assert os.path.exists( os.path.join(output, "result_metrics.json"))
def test_multiprocess(self): accuracy = use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual" ) dataset = ListDataset([ Environment( {"query": "query0", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query1", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query2", "ground_truth": "c0"}, set(["ground_truth"]) ), ]) with tempfile.TemporaryDirectory() as init_dir: with context.Pool(2) as pool: procs = [] for i in range(2): p = pool.apply_async( self._run, args=(init_dir, dataset, {"accuracy": accuracy}, i), ) procs.append(p) out = [p.get() for p in procs] r0 = out[0] r1 = out[1] assert r0 == r1 results = r0 assert results.metrics == {1: {"accuracy": 1.0 / 3}, 3: {"accuracy": 2.0 / 3}} assert 3 == len(results.results) results.results[0].time = 0.0 results.results[1].time = 0.0 results.results[2].time = 0.0 results.results.sort(key=lambda x: x.sample["query"]) assert Result({"query": "query0", "ground_truth": "c0"}, ["c0", "c1", "c2"], {1: {"accuracy": 1.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[0] assert Result({"query": "query1", "ground_truth": "c0"}, ["c2", "c3", "c0"], {1: {"accuracy": 0.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[1] assert Result({"query": "query2", "ground_truth": "c0"}, ["c2", "c3", "c5"], {1: {"accuracy": 0.0}, 3: {"accuracy": 0.0}}, True, 0.0) == results.results[2]
def _run(self, init_dir, input, output, model, synthesizer, dataset, rank): distributed.initialize(init_dir, rank, 2) evaluate( input, output, dataset, model, synthesizer, { "accuracy": use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), "bleu": use_environment( Bleu(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual", ), } )
def evaluate(self, qencoder, aencoder, dir): model = self.prepare_model(qencoder, aencoder) eval( dir, dir, test_dataset, model, self.prepare_synthesizer(model, qencoder, aencoder), { "accuracy": use_environment( metric=Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual") }, top_n=[5], ) return torch.load(os.path.join(dir, "result.pt"))
def test_simple_case(self): accuracy = use_environment( Accuracy(), in_keys=["actual", ["ground_truth", "expected"]], value_key="actual" ) dataset = ListDataset([ Environment( {"query": "query0", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query1", "ground_truth": "c0"}, set(["ground_truth"]) ), Environment( {"query": "query2", "ground_truth": "c0"}, set(["ground_truth"]) ), ]) results = EvaluateSynthesizer(dataset, synthesize, metrics={"accuracy": accuracy})() assert results.metrics == \ {1: {"accuracy": 1.0 / 3.0}, 3: {"accuracy": 2.0 / 3.0}} assert 3 == len(results.results) results.results[0].time = 0.0 results.results[1].time = 0.0 results.results[2].time = 0.0 assert Result({"query": "query0", "ground_truth": "c0"}, ["c0", "c1", "c2"], {1: {"accuracy": 1.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[0] assert Result({"query": "query1", "ground_truth": "c0"}, ["c2", "c3", "c0"], {1: {"accuracy": 0.0}, 3: {"accuracy": 1.0}}, True, 0.0) == results.results[1] assert Result({"query": "query2", "ground_truth": "c0"}, ["c2", "c3", "c5"], {1: {"accuracy": 0.0}, 3: {"accuracy": 0.0}}, True, 0.0) == results.results[2]
def reinforce(self, train_dataset, encoder, output_dir): with tempfile.TemporaryDirectory() as tmpdir: interpreter = self.interpreter() collate = Collate( test_case_tensor=CollateOptions(False, 0, 0), variables_tensor=CollateOptions(True, 0, 0), previous_actions=CollateOptions(True, 0, -1), hidden_state=CollateOptions(False, 0, 0), state=CollateOptions(False, 0, 0), ground_truth_actions=CollateOptions(True, 0, -1), reward=CollateOptions(False, 0, 0) ) collate_fn = Sequence(OrderedDict([ ("to_episode", Map(self.to_episode(encoder, interpreter))), ("flatten", Flatten()), ("transform", Map(self.transform( encoder, interpreter, Parser()))), ("collate", collate.collate) ])) model = self.prepare_model(encoder) optimizer = self.prepare_optimizer(model) train_REINFORCE( output_dir, tmpdir, output_dir, train_dataset, self.prepare_synthesizer(model, encoder, interpreter), model, optimizer, torch.nn.Sequential(OrderedDict([ ("policy", torch.nn.Sequential(OrderedDict([ ("loss", Apply( module=mlprogram.nn.action_sequence.Loss( reduction="none" ), in_keys=[ "rule_probs", "token_probs", "reference_probs", "ground_truth_actions", ], out_key="action_sequence_loss", )), ("weight_by_reward", Apply( [("reward", "lhs"), ("action_sequence_loss", "rhs")], "action_sequence_loss", mlprogram.nn.Function(Mul()))) ]))), ("value", torch.nn.Sequential(OrderedDict([ ("reshape_reward", Apply( [("reward", "x")], "value_loss_target", Reshape([-1, 1]))), ("BCE", Apply( [("value", "input"), ("value_loss_target", "target")], "value_loss", torch.nn.BCELoss(reduction='sum'))), ("reweight", Apply( [("value_loss", "lhs")], "value_loss", mlprogram.nn.Function(Mul()), constants={"rhs": 1e-2})), ]))), ("aggregate", Apply( ["action_sequence_loss", "value_loss"], "loss", AggregatedLoss())), ("normalize", Apply( [("loss", "lhs")], "loss", mlprogram.nn.Function(Div()), constants={"rhs": 1})), ("pick", mlprogram.nn.Function( Pick("loss"))) ])), EvaluateSynthesizer( train_dataset, self.prepare_synthesizer(model, encoder, interpreter, rollout=False), {}, top_n=[]), "generation_rate", metrics.use_environment( metric=metrics.TestCaseResult( interpreter=interpreter, metric=metrics.use_environment( metric=metrics.Iou(), in_keys=["actual", "expected"], value_key="actual", ) ), in_keys=["test_cases", "actual"], value_key="actual", transform=Threshold(threshold=0.9, dtype="float"), ), collate_fn, 1, 1, Epoch(10), evaluation_interval=Epoch(10), snapshot_interval=Epoch(10), use_pretrained_model=True, use_pretrained_optimizer=True, threshold=1.0)
def prepare_synthesizer(self, model, encoder, interpreter, rollout=True): collate = Collate( test_case_tensor=CollateOptions(False, 0, 0), input_feature=CollateOptions(False, 0, 0), test_case_feature=CollateOptions(False, 0, 0), reference_features=CollateOptions(True, 0, 0), variables_tensor=CollateOptions(True, 0, 0), previous_actions=CollateOptions(True, 0, -1), hidden_state=CollateOptions(False, 0, 0), state=CollateOptions(False, 0, 0), ground_truth_actions=CollateOptions(True, 0, -1) ) subsampler = ActionSequenceSampler( encoder, IsSubtype(), Sequence(OrderedDict([ ("tinput", Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", )), ("tvariable", Apply( module=TransformVariables(), in_keys=["variables", "test_case_tensor"], out_key="variables_tensor" )), ])), Compose(OrderedDict([ ("add_previous_actions", Apply( module=AddPreviousActions(encoder, n_dependent=1), in_keys=["action_sequence", "reference"], out_key="previous_actions", constants={"train": False}, )), ("add_state", AddState("state")), ("add_hidden_state", AddState("hidden_state")) ])), collate, model, rng=np.random.RandomState(0)) subsampler = mlprogram.samplers.transform( subsampler, Parser().unparse ) subsynthesizer = SMC( 5, 1, subsampler, max_try_num=1, to_key=Pick("action_sequence"), rng=np.random.RandomState(0) ) sampler = SequentialProgramSampler( subsynthesizer, Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", ), collate, model.encode_input, interpreter=interpreter, expander=Expander(), rng=np.random.RandomState(0)) if rollout: sampler = FilteredSampler( sampler, metrics.use_environment( metric=metrics.TestCaseResult( interpreter, metric=metrics.use_environment( metric=metrics.Iou(), in_keys=["actual", "expected"], value_key="actual", ) ), in_keys=["test_cases", "actual"], value_key="actual" ), 1.0 ) return SMC(3, 1, sampler, rng=np.random.RandomState(0), to_key=Pick("interpreter_state"), max_try_num=1) else: sampler = SamplerWithValueNetwork( sampler, Sequence(OrderedDict([ ("tinput", Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", )), ("tvariable", Apply( module=TransformVariables(), in_keys=["variables", "test_case_tensor"], out_key="variables_tensor" )), ])), collate, torch.nn.Sequential(OrderedDict([ ("encoder", model.encoder), ("value", model.value), ("pick", mlprogram.nn.Function( Pick("value"))) ]))) synthesizer = SynthesizerWithTimeout( SMC(3, 1, sampler, rng=np.random.RandomState(0), to_key=Pick("interpreter_state"), max_try_num=1), 1 ) return FilteredSynthesizer( synthesizer, metrics.use_environment( metric=metrics.TestCaseResult( interpreter, metric=metrics.use_environment( metric=metrics.Iou(), in_keys=["actual", "expected"], value_key="actual", ) ), in_keys=["test_cases", "actual"], value_key="actual" ), 1.0 )