예제 #1
0
    def test_primitive_encoding(self):
        encoding = primitive_encoding(-10, DatasetMetadata(0, set([]), 256, 2))
        self.assertEqual(0, encoding.t)
        self.assertTrue(
            np.all(np.array([-10 + 256, 512] == encoding.value_arr)))

        encoding = primitive_encoding([1, 2],
                                      DatasetMetadata(0, set([]), 256, 3))
        self.assertEqual(1, encoding.t)
        self.assertTrue(np.all(
            np.array([257, 258, 512] == encoding.value_arr)))
    def test_generate_dataset_can_relax_equivalence_checking(self):
        LINQ, _ = generate_io_samples.get_language(50)
        HEAD = [f for f in LINQ if f.src == "HEAD"][0]
        LAST = [f for f in LINQ if f.src == "LAST"][0]

        # Generate the program with the length of 1
        with tempfile.NamedTemporaryFile() as f:
            name = f.name
            np.random.seed(0)
            generate_dataset([HEAD, LAST], DatasetSpec(50, 20, 5, 1, 1),
                             EquivalenceCheckingSpec(0, 1, None), name)
            # Check the dataset
            srcs = set()
            with open(name, "rb") as fp:
                d = pickle.load(fp)
                dataset = d.dataset
                metadata = d.metadata
                for entry, in dataset:
                    srcs.add(entry.source_code)
                    p = generate_io_samples.compile(entry.source_code, 50, 5)
                    self.assertNotEqual(None, p)
                    for example in entry.examples:
                        output = p.fun(example.inputs)
                        self.assertEqual(output, example.output)
            self.assertEqual(
                set(["a <- [int]\nb <- HEAD a", "a <- [int]\nb <- LAST a"]),
                srcs)
            self.assertEqual(DatasetMetadata(1, set(["HEAD", "LAST"]), 50, 20),
                             metadata)
예제 #3
0
    def test_EncodedDataset_constructor(self):
        dataset = ch.datasets.TupleDataset([
            Entry("entry1", [Example(([10, 20, 30], ), 10)],
                  dict([["HEAD", True], ["SORT", False]])),
            Entry("entry2", [Example(([30, 20, 10], ), [10, 20, 30])],
                  dict([["HEAD", False], ["SORT", True]]))
        ])

        cdataset = EncodedDataset(
            Dataset(dataset, DatasetMetadata(1, set(["HEAD", "SORT"]), 256,
                                             5)))
        [(types0, values0, attribute0),
         (types1, values1, attribute1)] = list(cdataset)

        self.assertTrue(np.all([[[0, 1], [1, 0]]] == types0))
        self.assertTrue(
            np.all([[[266, 276, 286, 512, 512], [266, 512, 512, 512, 512]]] ==
                   values0))
        self.assertTrue(np.all(np.array([1, 0]) == attribute0))

        self.assertTrue(np.all([[[0, 1], [0, 1]]] == types1))
        self.assertTrue(
            np.all([[[286, 276, 266, 512, 512], [266, 276, 286, 512, 512]]] ==
                   values1))
        self.assertTrue(np.all(np.array([0, 1]) == attribute1))
예제 #4
0
    def test_example_embed_embed_minibatch_with_different_number_of_inputs(
            self):
        embed = ExampleEmbed(2, 2, 1, (np.arange(5) + 1).reshape((5, 1)))
        """
        EmbedId
          0 (-2)   -> 1 
          1 (-1)   -> 2
          2 ( 0)   -> 3
          3 ( 1)   -> 4
          4 (NULL) -> 5
        """

        metadata = DatasetMetadata(2, set([]), 2, 2)
        e0 = examples_encoding(
            [Example([[0, 1]], 0), Example([[1]], 1)], metadata)
        e1 = examples_encoding(
            [Example([1, [0, 1]], [0]),
             Example([0, [0, 1]], [])], metadata)

        state_embeddings = embed.forward(np.array([e0.types, e1.types]),
                                         np.array([e0.values, e1.values]))
        self.assertEqual((2, 2, 3, 2 + 2 * 1), state_embeddings.shape)
        self.assertTrue(
            np.allclose([0, 1, 3, 4],
                        state_embeddings.array[0, 0, 0]))  # Input of e00
        self.assertTrue(
            np.allclose([0, 0, 5, 5],
                        state_embeddings.array[0, 0, 1]))  # Input of e00
        # Output of e00
        self.assertTrue(
            np.allclose([1, 0, 3, 5], state_embeddings.array[0, 0, 2]))
        self.assertTrue(
            np.allclose([0, 1, 4, 5],
                        state_embeddings.array[0, 1, 0]))  # Input of e01
        self.assertTrue(
            np.allclose([0, 0, 5, 5],
                        state_embeddings.array[0, 1, 1]))  # Input of e01
        # Output of e01
        self.assertTrue(
            np.allclose([1, 0, 4, 5], state_embeddings.array[0, 1, 2]))
        self.assertTrue(
            np.allclose([1, 0, 4, 5],
                        state_embeddings.array[1, 0, 0]))  # Input of e10
        self.assertTrue(
            np.allclose([0, 1, 3, 4],
                        state_embeddings.array[1, 0, 1]))  # Input of e10
        # Output of e10
        self.assertTrue(
            np.allclose([0, 1, 3, 5], state_embeddings.array[1, 0, 2]))
        self.assertTrue(
            np.allclose([1, 0, 3, 5],
                        state_embeddings.array[1, 1, 0]))  # Input of e11
        self.assertTrue(
            np.allclose([0, 1, 3, 4],
                        state_embeddings.array[1, 1, 1]))  # Input of e11
        # Output of e11
        self.assertTrue(
            np.allclose([0, 1, 5, 5], state_embeddings.array[1, 1, 2]))
예제 #5
0
    def test_TrainingClassifier(self):
        embed = ExampleEmbed(1, 2, 2)
        encoder = Encoder(10)
        decoder = Decoder(2)
        classifier = TrainingClassifier(ch.Sequential(embed, encoder, decoder))

        metadata = DatasetMetadata(1, set([]), 2, 2)
        e = examples_encoding(
            [Example([[0, 1]], 0), Example([[1]], 1)], metadata)
        labels = np.array([[1, 1]])
        loss = classifier(np.array([e.types]), np.array([e.values]), labels)
        loss.grad = np.ones(loss.shape, dtype=np.float32)

        # backward does not throw an error
        loss.backward()
예제 #6
0
    def test_predict_with_neural_network(self):
        examples = [
            Example([2, [10, 20, 30]], 30),
            Example([1, [-10, 30, 40]], 30)
        ]
        metadata = DatasetMetadata(2, set(["MAP", "HEAD"]), 256, 5)
        model_shape = ModelShapeParameters(metadata, 3, 2, 10)
        m = InferenceModel(model_shape)
        pred = predict_with_neural_network(model_shape, m)
        prob = pred(examples)

        encoding = examples_encoding(examples, metadata)
        prob_dnn = m.model(np.array([encoding.types]),
                           np.array([encoding.values])).array[0]

        self.assertAlmostEqual(prob_dnn[0], prob["HEAD"])
        self.assertAlmostEqual(prob_dnn[1], prob["MAP"])
    def test_generate_dataset_separate_higher_order_function_and_lambda(self):
        LINQ, _ = generate_io_samples.get_language(50)
        HEAD = [f for f in LINQ if f.src == "HEAD"][0]
        MAP_INC = [f for f in LINQ if f.src == "MAP INC"][0]

        # Generate the program with the length of 1
        with tempfile.NamedTemporaryFile() as f:
            name = f.name
            np.random.seed(0)
            generate_dataset([HEAD, MAP_INC], DatasetSpec(50, 20, 5, 1, 1),
                             EquivalenceCheckingSpec(1, 1, None), name)
            # Check the dataset
            attribute_keys = set()
            with open(name, "rb") as fp:
                d = pickle.load(fp)
                dataset = d.dataset
                metadata = d.metadata
                for entry, in dataset:
                    for symbol in entry.attribute.keys():
                        attribute_keys.add(symbol)
            self.assertEqual(set(["HEAD", "MAP", "INC"]), attribute_keys)
            self.assertEqual(
                DatasetMetadata(1, set(["HEAD", "MAP", "INC"]), 50, 20),
                metadata)
예제 #8
0
    def test_Encoder(self):
        embed = ExampleEmbed(1, 2, 1, (np.arange(5) + 1).reshape((5, 1)))

        encoder = Encoder(1,
                          initialW=ch.initializers.One(),
                          initial_bias=ch.initializers.Zero())
        self.assertEqual(6, len(list(encoder.params())))
        """
        state_embeddings: (N, e, 2, 4) -> h1: (N, e, 1) -> h2: (N, e, 2) -> output: (N, e, 2)
        """

        metadata = DatasetMetadata(1, set([]), 2, 2)
        e = examples_encoding(
            [Example([[0, 1]], 0), Example([[1]], 1)], metadata)

        state_embeddings = embed(np.array([e.types]), np.array([e.values]))
        layer_encodings = encoder(state_embeddings)

        self.assertEqual((1, 2, 1), layer_encodings.shape)
        for i in range(1):
            for j in range(2):
                h = np.array(state_embeddings[i, j, :, :].array.sum())
                h = F.sigmoid(F.sigmoid(F.sigmoid(h)))
                self.assertEqual(h.array, layer_encodings.array[i, j])
예제 #9
0
    def test_example_embed_embed_one_sample(self):
        embed = ExampleEmbed(1, 2, 1, (np.arange(5) + 1).reshape((5, 1)))
        self.assertEqual(1, len(list(embed.params())))
        """
        EmbedId
          0 (-2)   -> 1 
          1 (-1)   -> 2
          2 ( 0)   -> 3
          3 ( 1)   -> 4
          4 (NULL) -> 5
        """

        e = examples_encoding(
            [Example([[0, 1]], 0), Example([[1]], 1)],
            DatasetMetadata(1, set([]), 2, 2))

        state_embeddings = embed.forward(np.array([e.types]),
                                         np.array([e.values]))
        self.assertEqual((1, 2, 2, 2 + 2 * 1), state_embeddings.shape)
        self.assertTrue(
            np.allclose([0, 1, 3, 4],
                        state_embeddings.array[0, 0, 0]))  # Input of e1
        self.assertTrue(
            np.allclose([1, 0, 3, 5],
                        state_embeddings.array[0, 0, 1]))  # Output of e1
        self.assertTrue(
            np.allclose([0, 1, 4, 5],
                        state_embeddings.array[0, 1, 0]))  # Input of e2
        self.assertTrue(
            np.allclose([1, 0, 4, 5],
                        state_embeddings.array[0, 1, 1]))  # Output of e2

        # backward does not throw an error
        state_embeddings.grad = np.ones(state_embeddings.shape,
                                        dtype=np.float32)
        state_embeddings.backward()
    def test_generate_dataset(self):
        LINQ, _ = generate_io_samples.get_language(50)
        HEAD = [f for f in LINQ if f.src == "HEAD"][0]
        TAKE = [f for f in LINQ if f.src == "TAKE"][0]

        # Generate the program with the length of 1
        with tempfile.NamedTemporaryFile() as f:
            name = f.name
            generate_dataset([HEAD, TAKE], DatasetSpec(50, 20, 5, 1, 1),
                             EquivalenceCheckingSpec(1.0, 1, None), name)
            # Check the dataset
            srcs = set()
            with open(name, "rb") as fp:
                d = pickle.load(fp)
                dataset = d.dataset
                metadata = d.metadata
                for entry, in dataset:
                    srcs.add(entry.source_code)
                    p = generate_io_samples.compile(entry.source_code, 50, 5)
                    self.assertNotEqual(None, p)
                    for example in entry.examples:
                        output = p.fun(example.inputs)
                        self.assertEqual(output, example.output)
            self.assertEqual(
                set([
                    "a <- int\nb <- [int]\nc <- TAKE a b",
                    "a <- [int]\nb <- HEAD a"
                ]), srcs)
            self.assertEqual(DatasetMetadata(2, set(["TAKE", "HEAD"]), 50, 20),
                             metadata)

        # Generate the program with the length of 2
        with tempfile.NamedTemporaryFile() as f:
            name = f.name

            def simplify(program):
                program = remove_redundant_variables(program)
                return program

            generate_dataset([HEAD, TAKE],
                             DatasetSpec(50, 20, 5, 2, 2),
                             EquivalenceCheckingSpec(1.0, 1, None),
                             name,
                             simplify=simplify)

            # Check the dataset
            srcs = set()
            with open(name, "rb") as fp:
                d = pickle.load(fp)
                dataset = d.dataset
                metadata = d.metadata
                for entry, in dataset:
                    srcs.add(entry.source_code)
                    p = generate_io_samples.compile(entry.source_code, 50, 5)
                    self.assertNotEqual(None, p)
                    for example in entry.examples:
                        output = p.fun(example.inputs)
                        self.assertEqual(output, example.output)
            self.assertEqual(
                set([
                    "a <- [int]\nb <- HEAD a\nc <- TAKE b a",
                    "a <- int\nb <- [int]\nc <- TAKE a b\nd <- TAKE a c",
                    "a <- int\nb <- [int]\nc <- int\nd <- TAKE a b\ne <- TAKE c d",
                    "a <- int\nb <- [int]\nc <- TAKE a b\nd <- HEAD c",
                    "a <- [int]\nb <- [int]\nc <- HEAD a\nd <- TAKE c b"
                ]), srcs)
            self.assertEqual(DatasetMetadata(3, set(["TAKE", "HEAD"]), 50, 20),
                             metadata)
예제 #11
0
 def test_examples_encoding_if_num_inputs_is_too_large(self):
     metadata = DatasetMetadata(0, set([]), 2, 2)
     self.assertRaises(
         RuntimeError, lambda: examples_encoding(
             [Example([1, [0, 1]], [0]),
              Example([0, [0, 1]], [])], metadata))