def test_primitive_encoding(self): encoding = primitive_encoding(-10, DatasetMetadata(0, set([]), 256, 2)) self.assertEqual(0, encoding.t) self.assertTrue( np.all(np.array([-10 + 256, 512] == encoding.value_arr))) encoding = primitive_encoding([1, 2], DatasetMetadata(0, set([]), 256, 3)) self.assertEqual(1, encoding.t) self.assertTrue(np.all( np.array([257, 258, 512] == encoding.value_arr)))
def test_generate_dataset_can_relax_equivalence_checking(self): LINQ, _ = generate_io_samples.get_language(50) HEAD = [f for f in LINQ if f.src == "HEAD"][0] LAST = [f for f in LINQ if f.src == "LAST"][0] # Generate the program with the length of 1 with tempfile.NamedTemporaryFile() as f: name = f.name np.random.seed(0) generate_dataset([HEAD, LAST], DatasetSpec(50, 20, 5, 1, 1), EquivalenceCheckingSpec(0, 1, None), name) # Check the dataset srcs = set() with open(name, "rb") as fp: d = pickle.load(fp) dataset = d.dataset metadata = d.metadata for entry, in dataset: srcs.add(entry.source_code) p = generate_io_samples.compile(entry.source_code, 50, 5) self.assertNotEqual(None, p) for example in entry.examples: output = p.fun(example.inputs) self.assertEqual(output, example.output) self.assertEqual( set(["a <- [int]\nb <- HEAD a", "a <- [int]\nb <- LAST a"]), srcs) self.assertEqual(DatasetMetadata(1, set(["HEAD", "LAST"]), 50, 20), metadata)
def test_EncodedDataset_constructor(self): dataset = ch.datasets.TupleDataset([ Entry("entry1", [Example(([10, 20, 30], ), 10)], dict([["HEAD", True], ["SORT", False]])), Entry("entry2", [Example(([30, 20, 10], ), [10, 20, 30])], dict([["HEAD", False], ["SORT", True]])) ]) cdataset = EncodedDataset( Dataset(dataset, DatasetMetadata(1, set(["HEAD", "SORT"]), 256, 5))) [(types0, values0, attribute0), (types1, values1, attribute1)] = list(cdataset) self.assertTrue(np.all([[[0, 1], [1, 0]]] == types0)) self.assertTrue( np.all([[[266, 276, 286, 512, 512], [266, 512, 512, 512, 512]]] == values0)) self.assertTrue(np.all(np.array([1, 0]) == attribute0)) self.assertTrue(np.all([[[0, 1], [0, 1]]] == types1)) self.assertTrue( np.all([[[286, 276, 266, 512, 512], [266, 276, 286, 512, 512]]] == values1)) self.assertTrue(np.all(np.array([0, 1]) == attribute1))
def test_example_embed_embed_minibatch_with_different_number_of_inputs( self): embed = ExampleEmbed(2, 2, 1, (np.arange(5) + 1).reshape((5, 1))) """ EmbedId 0 (-2) -> 1 1 (-1) -> 2 2 ( 0) -> 3 3 ( 1) -> 4 4 (NULL) -> 5 """ metadata = DatasetMetadata(2, set([]), 2, 2) e0 = examples_encoding( [Example([[0, 1]], 0), Example([[1]], 1)], metadata) e1 = examples_encoding( [Example([1, [0, 1]], [0]), Example([0, [0, 1]], [])], metadata) state_embeddings = embed.forward(np.array([e0.types, e1.types]), np.array([e0.values, e1.values])) self.assertEqual((2, 2, 3, 2 + 2 * 1), state_embeddings.shape) self.assertTrue( np.allclose([0, 1, 3, 4], state_embeddings.array[0, 0, 0])) # Input of e00 self.assertTrue( np.allclose([0, 0, 5, 5], state_embeddings.array[0, 0, 1])) # Input of e00 # Output of e00 self.assertTrue( np.allclose([1, 0, 3, 5], state_embeddings.array[0, 0, 2])) self.assertTrue( np.allclose([0, 1, 4, 5], state_embeddings.array[0, 1, 0])) # Input of e01 self.assertTrue( np.allclose([0, 0, 5, 5], state_embeddings.array[0, 1, 1])) # Input of e01 # Output of e01 self.assertTrue( np.allclose([1, 0, 4, 5], state_embeddings.array[0, 1, 2])) self.assertTrue( np.allclose([1, 0, 4, 5], state_embeddings.array[1, 0, 0])) # Input of e10 self.assertTrue( np.allclose([0, 1, 3, 4], state_embeddings.array[1, 0, 1])) # Input of e10 # Output of e10 self.assertTrue( np.allclose([0, 1, 3, 5], state_embeddings.array[1, 0, 2])) self.assertTrue( np.allclose([1, 0, 3, 5], state_embeddings.array[1, 1, 0])) # Input of e11 self.assertTrue( np.allclose([0, 1, 3, 4], state_embeddings.array[1, 1, 1])) # Input of e11 # Output of e11 self.assertTrue( np.allclose([0, 1, 5, 5], state_embeddings.array[1, 1, 2]))
def test_TrainingClassifier(self): embed = ExampleEmbed(1, 2, 2) encoder = Encoder(10) decoder = Decoder(2) classifier = TrainingClassifier(ch.Sequential(embed, encoder, decoder)) metadata = DatasetMetadata(1, set([]), 2, 2) e = examples_encoding( [Example([[0, 1]], 0), Example([[1]], 1)], metadata) labels = np.array([[1, 1]]) loss = classifier(np.array([e.types]), np.array([e.values]), labels) loss.grad = np.ones(loss.shape, dtype=np.float32) # backward does not throw an error loss.backward()
def test_predict_with_neural_network(self): examples = [ Example([2, [10, 20, 30]], 30), Example([1, [-10, 30, 40]], 30) ] metadata = DatasetMetadata(2, set(["MAP", "HEAD"]), 256, 5) model_shape = ModelShapeParameters(metadata, 3, 2, 10) m = InferenceModel(model_shape) pred = predict_with_neural_network(model_shape, m) prob = pred(examples) encoding = examples_encoding(examples, metadata) prob_dnn = m.model(np.array([encoding.types]), np.array([encoding.values])).array[0] self.assertAlmostEqual(prob_dnn[0], prob["HEAD"]) self.assertAlmostEqual(prob_dnn[1], prob["MAP"])
def test_generate_dataset_separate_higher_order_function_and_lambda(self): LINQ, _ = generate_io_samples.get_language(50) HEAD = [f for f in LINQ if f.src == "HEAD"][0] MAP_INC = [f for f in LINQ if f.src == "MAP INC"][0] # Generate the program with the length of 1 with tempfile.NamedTemporaryFile() as f: name = f.name np.random.seed(0) generate_dataset([HEAD, MAP_INC], DatasetSpec(50, 20, 5, 1, 1), EquivalenceCheckingSpec(1, 1, None), name) # Check the dataset attribute_keys = set() with open(name, "rb") as fp: d = pickle.load(fp) dataset = d.dataset metadata = d.metadata for entry, in dataset: for symbol in entry.attribute.keys(): attribute_keys.add(symbol) self.assertEqual(set(["HEAD", "MAP", "INC"]), attribute_keys) self.assertEqual( DatasetMetadata(1, set(["HEAD", "MAP", "INC"]), 50, 20), metadata)
def test_Encoder(self): embed = ExampleEmbed(1, 2, 1, (np.arange(5) + 1).reshape((5, 1))) encoder = Encoder(1, initialW=ch.initializers.One(), initial_bias=ch.initializers.Zero()) self.assertEqual(6, len(list(encoder.params()))) """ state_embeddings: (N, e, 2, 4) -> h1: (N, e, 1) -> h2: (N, e, 2) -> output: (N, e, 2) """ metadata = DatasetMetadata(1, set([]), 2, 2) e = examples_encoding( [Example([[0, 1]], 0), Example([[1]], 1)], metadata) state_embeddings = embed(np.array([e.types]), np.array([e.values])) layer_encodings = encoder(state_embeddings) self.assertEqual((1, 2, 1), layer_encodings.shape) for i in range(1): for j in range(2): h = np.array(state_embeddings[i, j, :, :].array.sum()) h = F.sigmoid(F.sigmoid(F.sigmoid(h))) self.assertEqual(h.array, layer_encodings.array[i, j])
def test_example_embed_embed_one_sample(self): embed = ExampleEmbed(1, 2, 1, (np.arange(5) + 1).reshape((5, 1))) self.assertEqual(1, len(list(embed.params()))) """ EmbedId 0 (-2) -> 1 1 (-1) -> 2 2 ( 0) -> 3 3 ( 1) -> 4 4 (NULL) -> 5 """ e = examples_encoding( [Example([[0, 1]], 0), Example([[1]], 1)], DatasetMetadata(1, set([]), 2, 2)) state_embeddings = embed.forward(np.array([e.types]), np.array([e.values])) self.assertEqual((1, 2, 2, 2 + 2 * 1), state_embeddings.shape) self.assertTrue( np.allclose([0, 1, 3, 4], state_embeddings.array[0, 0, 0])) # Input of e1 self.assertTrue( np.allclose([1, 0, 3, 5], state_embeddings.array[0, 0, 1])) # Output of e1 self.assertTrue( np.allclose([0, 1, 4, 5], state_embeddings.array[0, 1, 0])) # Input of e2 self.assertTrue( np.allclose([1, 0, 4, 5], state_embeddings.array[0, 1, 1])) # Output of e2 # backward does not throw an error state_embeddings.grad = np.ones(state_embeddings.shape, dtype=np.float32) state_embeddings.backward()
def test_generate_dataset(self): LINQ, _ = generate_io_samples.get_language(50) HEAD = [f for f in LINQ if f.src == "HEAD"][0] TAKE = [f for f in LINQ if f.src == "TAKE"][0] # Generate the program with the length of 1 with tempfile.NamedTemporaryFile() as f: name = f.name generate_dataset([HEAD, TAKE], DatasetSpec(50, 20, 5, 1, 1), EquivalenceCheckingSpec(1.0, 1, None), name) # Check the dataset srcs = set() with open(name, "rb") as fp: d = pickle.load(fp) dataset = d.dataset metadata = d.metadata for entry, in dataset: srcs.add(entry.source_code) p = generate_io_samples.compile(entry.source_code, 50, 5) self.assertNotEqual(None, p) for example in entry.examples: output = p.fun(example.inputs) self.assertEqual(output, example.output) self.assertEqual( set([ "a <- int\nb <- [int]\nc <- TAKE a b", "a <- [int]\nb <- HEAD a" ]), srcs) self.assertEqual(DatasetMetadata(2, set(["TAKE", "HEAD"]), 50, 20), metadata) # Generate the program with the length of 2 with tempfile.NamedTemporaryFile() as f: name = f.name def simplify(program): program = remove_redundant_variables(program) return program generate_dataset([HEAD, TAKE], DatasetSpec(50, 20, 5, 2, 2), EquivalenceCheckingSpec(1.0, 1, None), name, simplify=simplify) # Check the dataset srcs = set() with open(name, "rb") as fp: d = pickle.load(fp) dataset = d.dataset metadata = d.metadata for entry, in dataset: srcs.add(entry.source_code) p = generate_io_samples.compile(entry.source_code, 50, 5) self.assertNotEqual(None, p) for example in entry.examples: output = p.fun(example.inputs) self.assertEqual(output, example.output) self.assertEqual( set([ "a <- [int]\nb <- HEAD a\nc <- TAKE b a", "a <- int\nb <- [int]\nc <- TAKE a b\nd <- TAKE a c", "a <- int\nb <- [int]\nc <- int\nd <- TAKE a b\ne <- TAKE c d", "a <- int\nb <- [int]\nc <- TAKE a b\nd <- HEAD c", "a <- [int]\nb <- [int]\nc <- HEAD a\nd <- TAKE c b" ]), srcs) self.assertEqual(DatasetMetadata(3, set(["TAKE", "HEAD"]), 50, 20), metadata)
def test_examples_encoding_if_num_inputs_is_too_large(self): metadata = DatasetMetadata(0, set([]), 2, 2) self.assertRaises( RuntimeError, lambda: examples_encoding( [Example([1, [0, 1]], [0]), Example([0, [0, 1]], [])], metadata))