def test_eval(self): entries = [Environment( {"ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParser()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParser())( "y = x + 1" ) transform = AddPreviousActions(aencoder) prev_action_tensor = transform( reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")], action_sequence=action_sequence, train=False ) assert np.array_equal( [ [2, -1, -1], [3, -1, -1], [4, -1, -1], [-1, 1, -1], [1, -1, -1], [5, -1, -1], [-1, 2, -1], [1, -1, -1], [4, -1, -1], [-1, 3, -1], [1, -1, -1], [6, -1, -1], [-1, 4, -1], [1, -1, -1] ], prev_action_tensor.numpy() )
def transform_cls(self, qencoder, cencoder, aencoder, parser): return Sequence( OrderedDict([("extract_reference", Apply(module=mlprogram.nn.Function(tokenize), in_keys=[["text_query", "str"]], out_key="reference")), ("encode_word_query", Apply(module=EncodeWordQuery(qencoder), in_keys=["reference"], out_key="word_nl_query")), ("encode_char", Apply(module=EncodeCharacterQuery(cencoder, 10), in_keys=["reference"], out_key="char_nl_query")), ("f2", Apply( module=GroundTruthToActionSequence(parser), in_keys=["ground_truth"], out_key="action_sequence", )), ("add_previous_action", Apply( module=AddPreviousActions(aencoder), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key="previous_actions", )), ("add_previous_action_rule", Apply( module=AddPreviousActionRules(aencoder, 4), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key="previous_action_rules", )), ("add_tree", Apply( module=AddActionSequenceAsTree(aencoder), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key=["adjacency_matrix", "depthes"], )), ("add_query", Apply( module=AddQueryForTreeGenDecoder(aencoder, 4), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key="action_queries", )), ("f4", Apply( module=EncodeActionSequence(aencoder), in_keys=["action_sequence", "reference"], out_key="ground_truth_actions", ))]))
def test_impossible_case(self): entries = [Environment( {"ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParser()) d.tokens = [("", "y"), ("", "1")] aencoder = ActionSequenceEncoder(d, 0) transform = AddPreviousActions(aencoder) action_sequence = GroundTruthToActionSequence(MockParser())( "y = x + 1" ) with pytest.raises(RuntimeError): transform( reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")], action_sequence=action_sequence, train=True )
def prepare_synthesizer(self, model, qencoder, aencoder): transform_input = Compose( OrderedDict([("extract_reference", Apply(module=mlprogram.nn.Function(tokenize), in_keys=[["text_query", "str"]], out_key="reference")), ("encode_query", Apply(module=EncodeWordQuery(qencoder), in_keys=["reference"], out_key="word_nl_query"))])) transform_action_sequence = Compose( OrderedDict([("add_previous_action", Apply( module=AddPreviousActions(aencoder, n_dependent=1), in_keys=["action_sequence", "reference"], constants={"train": False}, out_key="previous_actions", )), ("add_action", Apply( module=AddActions(aencoder, n_dependent=1), in_keys=["action_sequence", "reference"], constants={"train": False}, out_key="actions", )), ("add_state", AddState("state")), ("add_hidden_state", AddState("hidden_state")), ("add_history", AddState("history"))])) collate = Collate(word_nl_query=CollateOptions(True, 0, -1), nl_query_features=CollateOptions(True, 0, -1), reference_features=CollateOptions(True, 0, -1), actions=CollateOptions(True, 0, -1), previous_actions=CollateOptions(True, 0, -1), previous_action_rules=CollateOptions(True, 0, -1), history=CollateOptions(False, 1, 0), hidden_state=CollateOptions(False, 0, 0), state=CollateOptions(False, 0, 0), ground_truth_actions=CollateOptions(True, 0, -1)) return BeamSearch( 5, 20, ActionSequenceSampler(aencoder, is_subtype, transform_input, transform_action_sequence, collate, model))
def transform_cls(self, qencoder, aencoder, parser): return Sequence( OrderedDict([("extract_reference", Apply(module=mlprogram.nn.Function(tokenize), in_keys=[["text_query", "str"]], out_key="reference")), ("encode_word_query", Apply(module=EncodeWordQuery(qencoder), in_keys=["reference"], out_key="word_nl_query")), ("f2", Apply( module=GroundTruthToActionSequence(parser), in_keys=["ground_truth"], out_key="action_sequence", )), ("add_previous_action", Apply( module=AddPreviousActions(aencoder, n_dependent=1), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key="previous_actions", )), ("add_action", Apply( module=AddActions(aencoder, n_dependent=1), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key="actions", )), ("add_state", AddState("state")), ("add_hidden_state", AddState("hidden_state")), ("add_history", AddState("history")), ("f4", Apply( module=EncodeActionSequence(aencoder), in_keys=["action_sequence", "reference"], out_key="ground_truth_actions", ))]))
def transform(self, encoder, interpreter, parser): tcode = Apply( module=GroundTruthToActionSequence(parser), in_keys=["ground_truth"], out_key="action_sequence" ) aaction = Apply( module=AddPreviousActions(encoder, n_dependent=1), in_keys=["action_sequence", "reference"], constants={"train": True}, out_key="previous_actions", ) tgt = Apply( module=EncodeActionSequence(encoder), in_keys=["action_sequence", "reference"], out_key="ground_truth_actions", ) return Sequence( OrderedDict([ ("tinput", Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", )), ("tvariable", Apply( module=TransformVariables(), in_keys=["variables", "test_case_tensor"], out_key="variables_tensor" )), ("tcode", tcode), ("aaction", aaction), ("add_state", AddState("state")), ("add_hidden_state", AddState("hidden_state")), ("tgt", tgt) ]) )
def prepare_synthesizer(self, model, qencoder, cencoder, aencoder): transform_input = Compose( OrderedDict([("extract_reference", Apply(module=mlprogram.nn.Function(tokenize), in_keys=[["text_query", "str"]], out_key="reference")), ("encode_word_query", Apply(module=EncodeWordQuery(qencoder), in_keys=["reference"], out_key="word_nl_query")), ("encode_char", Apply(module=EncodeCharacterQuery(cencoder, 10), in_keys=["reference"], out_key="char_nl_query"))])) transform_action_sequence = Compose( OrderedDict([("add_previous_action", Apply( module=AddPreviousActions(aencoder), in_keys=["action_sequence", "reference"], constants={"train": False}, out_key="previous_actions", )), ("add_previous_action_rule", Apply( module=AddPreviousActionRules( aencoder, 4, ), in_keys=["action_sequence", "reference"], constants={"train": False}, out_key="previous_action_rules", )), ("add_tree", Apply( module=AddActionSequenceAsTree(aencoder), in_keys=["action_sequence", "reference"], constants={"train": False}, out_key=["adjacency_matrix", "depthes"], )), ("add_query", Apply( module=AddQueryForTreeGenDecoder(aencoder, 4), in_keys=["action_sequence", "reference"], constants={"train": False}, out_key="action_queries", ))])) collate = Collate(word_nl_query=CollateOptions(True, 0, -1), char_nl_query=CollateOptions(True, 0, -1), nl_query_features=CollateOptions(True, 0, -1), reference_features=CollateOptions(True, 0, -1), previous_actions=CollateOptions(True, 0, -1), previous_action_rules=CollateOptions(True, 0, -1), depthes=CollateOptions(False, 1, 0), adjacency_matrix=CollateOptions(False, 0, 0), action_queries=CollateOptions(True, 0, -1), ground_truth_actions=CollateOptions(True, 0, -1)) return BeamSearch( 5, 20, ActionSequenceSampler(aencoder, is_subtype, transform_input, transform_action_sequence, collate, model))
def prepare_synthesizer(self, model, encoder, interpreter, rollout=True): collate = Collate( test_case_tensor=CollateOptions(False, 0, 0), input_feature=CollateOptions(False, 0, 0), test_case_feature=CollateOptions(False, 0, 0), reference_features=CollateOptions(True, 0, 0), variables_tensor=CollateOptions(True, 0, 0), previous_actions=CollateOptions(True, 0, -1), hidden_state=CollateOptions(False, 0, 0), state=CollateOptions(False, 0, 0), ground_truth_actions=CollateOptions(True, 0, -1) ) subsampler = ActionSequenceSampler( encoder, IsSubtype(), Sequence(OrderedDict([ ("tinput", Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", )), ("tvariable", Apply( module=TransformVariables(), in_keys=["variables", "test_case_tensor"], out_key="variables_tensor" )), ])), Compose(OrderedDict([ ("add_previous_actions", Apply( module=AddPreviousActions(encoder, n_dependent=1), in_keys=["action_sequence", "reference"], out_key="previous_actions", constants={"train": False}, )), ("add_state", AddState("state")), ("add_hidden_state", AddState("hidden_state")) ])), collate, model, rng=np.random.RandomState(0)) subsampler = mlprogram.samplers.transform( subsampler, Parser().unparse ) subsynthesizer = SMC( 5, 1, subsampler, max_try_num=1, to_key=Pick("action_sequence"), rng=np.random.RandomState(0) ) sampler = SequentialProgramSampler( subsynthesizer, Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", ), collate, model.encode_input, interpreter=interpreter, expander=Expander(), rng=np.random.RandomState(0)) if rollout: sampler = FilteredSampler( sampler, metrics.use_environment( metric=metrics.TestCaseResult( interpreter, metric=metrics.use_environment( metric=metrics.Iou(), in_keys=["actual", "expected"], value_key="actual", ) ), in_keys=["test_cases", "actual"], value_key="actual" ), 1.0 ) return SMC(3, 1, sampler, rng=np.random.RandomState(0), to_key=Pick("interpreter_state"), max_try_num=1) else: sampler = SamplerWithValueNetwork( sampler, Sequence(OrderedDict([ ("tinput", Apply( module=TransformInputs(), in_keys=["test_cases"], out_key="test_case_tensor", )), ("tvariable", Apply( module=TransformVariables(), in_keys=["variables", "test_case_tensor"], out_key="variables_tensor" )), ])), collate, torch.nn.Sequential(OrderedDict([ ("encoder", model.encoder), ("value", model.value), ("pick", mlprogram.nn.Function( Pick("value"))) ]))) synthesizer = SynthesizerWithTimeout( SMC(3, 1, sampler, rng=np.random.RandomState(0), to_key=Pick("interpreter_state"), max_try_num=1), 1 ) return FilteredSynthesizer( synthesizer, metrics.use_environment( metric=metrics.TestCaseResult( interpreter, metric=metrics.use_environment( metric=metrics.Iou(), in_keys=["actual", "expected"], value_key="actual", ) ), in_keys=["test_cases", "actual"], value_key="actual" ), 1.0 )