Esempio n. 1
0
    def testWassersteinNetwork(self):
        batch_n = 5
        holder = forgetting_nuisance.DatasetHolder(self.dummy_df)
        holder.add_shufflers(["compound", "batch"])
        input_dim = holder.input_dim
        feature_dim = input_dim
        network = forgetting_nuisance.WassersteinNetwork(
            holder, feature_dim, batch_n, 0, 1)

        self.assertEqual(set(network._unique_targets),
                         set([("drug", ), ("control", )]))
        self.assertEqual(set(network._unique_nuisances),
                         set([("batch1", ), ("batch2", )]))
        keys_for_targets = {
            ("drug", ): [(("drug", "batch1"), None),
                         (("drug", "batch2"), None)],
            ("control", ): [(("control", "batch1"), None),
                            (("control", "batch2"), None)]
        }
        self.assertSameElements(list(network._keys_for_targets.keys()),
                                list(keys_for_targets.keys()))
        for key in keys_for_targets:
            self.assertSameElements(keys_for_targets[key],
                                    network._keys_for_targets[key])

        with tf.Session() as sess:
            tf.global_variables_initializer().run()
            feed_dict = {}
            for key, shuffler in holder.data_shufflers.items():
                input_mini, _ = shuffler.next_batch(batch_n)
                feed_dict[network._x_vals[key]] = input_mini
            f_val, x_vals = sess.run([network._features, network._x_vals],
                                     feed_dict=feed_dict)

            ## make sure each input used in every batch came from the actual inputs
            for key, vals in x_vals.items():
                for row in vals:  # iterate over every element
                    ## identify distance from closest element in inputs
                    differences = [
                        la.norm(np.array(row) - np.array(candidates))
                        for candidates in holder.data_shufflers[key].inputs
                    ]
                self.assertAlmostEqual(min(differences), 0.0, places=5)
        self.assertEqual(list(f_val.values())[0].shape, (batch_n, input_dim))

        self.assertSameElements(list(network.wass_loss_target.keys()),
                                [("drug", ), ("control", )])
    def testMakeHolder2(self):
        ## make a holder in which the dataframe is formatted by "one_hot".
        ## In addition batch information is provided in the inputs.
        holder = forgetting_nuisance.DatasetHolder(
            self.dummy_df,
            input_category_level=["batch"],
            batch_input_info="one_hot")
        ## add input/output shufflers for each batch, with compound label outputs.
        holder.add_shufflers(["batch"], ["compound"], None)
        ## Pick the shuffler from batch1 to compounds.
        test_shuffler = holder.data_shufflers[("batch1", "compound")]
        inputs, outputs = (forgetting_nuisance.get_dense_arr(
            test_shuffler.inputs),
                           forgetting_nuisance.get_dense_arr(
                               test_shuffler.outputs))
        ## number of elements shoud be num_compounds * self.num.
        ## input dim should be (dim + 1) * num_batches for "multiplexed" format.
        ## output dim should be num_labels.
        self.assertEqual(inputs.shape, (2 * self.num, 4))
        self.assertEqual(outputs.shape, (2 * self.num, 2))

        ## Confirm the format is correct.
        zeros_chunck = np.zeros((self.num))
        ones_chunck = np.ones((self.num))
        np.testing.assert_array_equal(inputs[:self.num, 2], ones_chunck)
        np.testing.assert_array_equal(inputs[:self.num, 3], zeros_chunck)
        np.testing.assert_array_equal(outputs[:self.num, 0], ones_chunck)
        np.testing.assert_array_equal(outputs[:self.num, 1], zeros_chunck)

        np.testing.assert_array_equal(inputs[self.num:, 2], ones_chunck)
        np.testing.assert_array_equal(inputs[self.num:, 3], zeros_chunck)
        np.testing.assert_array_equal(outputs[self.num:, 0], zeros_chunck)
        np.testing.assert_array_equal(outputs[self.num:, 1], ones_chunck)

        ## add input/output shufflers for each compound, with batch label outputs.
        holder.add_shufflers(["compound"], ["batch"], None)
        test_shuffler = holder.data_shufflers[("control", "batch")]
        inputs, outputs = (forgetting_nuisance.get_dense_arr(
            test_shuffler.inputs),
                           forgetting_nuisance.get_dense_arr(
                               test_shuffler.outputs))
        self.assertEqual(inputs.shape, (2 * self.num, 4))
        self.assertEqual(outputs.shape, (2 * self.num, 2))

        ## Confirm the format is correct.
        zeros_chunck = np.zeros((self.num))
        ones_chunck = np.ones((self.num))
        np.testing.assert_array_equal(inputs[:self.num, 2], ones_chunck)
        np.testing.assert_array_equal(inputs[:self.num, 3], zeros_chunck)
        np.testing.assert_array_equal(outputs[:self.num, 0], ones_chunck)
        np.testing.assert_array_equal(outputs[:self.num, 1], zeros_chunck)

        np.testing.assert_array_equal(inputs[self.num:, 2], zeros_chunck)
        np.testing.assert_array_equal(inputs[self.num:, 3], ones_chunck)
        np.testing.assert_array_equal(outputs[self.num:, 0], zeros_chunck)
        np.testing.assert_array_equal(outputs[self.num:, 1], ones_chunck)

        ## add input/output shufflers from groups specified by compound and batch
        ## to output labels specified by batch.
        ## Allow only shufflers that have control compounds only.
        holder.add_shufflers(["compound", "batch"], ["batch"],
                             [("control", "batch1"), ("control", "batch2")])
        test_shuffler = holder.data_shufflers[("control", "batch1"), "batch"]
        inputs, outputs = (forgetting_nuisance.get_dense_arr(
            test_shuffler.inputs),
                           forgetting_nuisance.get_dense_arr(
                               test_shuffler.outputs))
        ## There should be 10 points in each set.
        self.assertEqual(inputs.shape, (self.num, 4))
        ## There is only one possible output label for this set since both batch and
        ## compound have been fixed.
        self.assertEqual(outputs.shape, (self.num, 1))