def testWassersteinNetwork(self): batch_n = 5 holder = forgetting_nuisance.DatasetHolder(self.dummy_df) holder.add_shufflers(["compound", "batch"]) input_dim = holder.input_dim feature_dim = input_dim network = forgetting_nuisance.WassersteinNetwork( holder, feature_dim, batch_n, 0, 1) self.assertEqual(set(network._unique_targets), set([("drug", ), ("control", )])) self.assertEqual(set(network._unique_nuisances), set([("batch1", ), ("batch2", )])) keys_for_targets = { ("drug", ): [(("drug", "batch1"), None), (("drug", "batch2"), None)], ("control", ): [(("control", "batch1"), None), (("control", "batch2"), None)] } self.assertSameElements(list(network._keys_for_targets.keys()), list(keys_for_targets.keys())) for key in keys_for_targets: self.assertSameElements(keys_for_targets[key], network._keys_for_targets[key]) with tf.Session() as sess: tf.global_variables_initializer().run() feed_dict = {} for key, shuffler in holder.data_shufflers.items(): input_mini, _ = shuffler.next_batch(batch_n) feed_dict[network._x_vals[key]] = input_mini f_val, x_vals = sess.run([network._features, network._x_vals], feed_dict=feed_dict) ## make sure each input used in every batch came from the actual inputs for key, vals in x_vals.items(): for row in vals: # iterate over every element ## identify distance from closest element in inputs differences = [ la.norm(np.array(row) - np.array(candidates)) for candidates in holder.data_shufflers[key].inputs ] self.assertAlmostEqual(min(differences), 0.0, places=5) self.assertEqual(list(f_val.values())[0].shape, (batch_n, input_dim)) self.assertSameElements(list(network.wass_loss_target.keys()), [("drug", ), ("control", )])
def testMakeHolder2(self): ## make a holder in which the dataframe is formatted by "one_hot". ## In addition batch information is provided in the inputs. holder = forgetting_nuisance.DatasetHolder( self.dummy_df, input_category_level=["batch"], batch_input_info="one_hot") ## add input/output shufflers for each batch, with compound label outputs. holder.add_shufflers(["batch"], ["compound"], None) ## Pick the shuffler from batch1 to compounds. test_shuffler = holder.data_shufflers[("batch1", "compound")] inputs, outputs = (forgetting_nuisance.get_dense_arr( test_shuffler.inputs), forgetting_nuisance.get_dense_arr( test_shuffler.outputs)) ## number of elements shoud be num_compounds * self.num. ## input dim should be (dim + 1) * num_batches for "multiplexed" format. ## output dim should be num_labels. self.assertEqual(inputs.shape, (2 * self.num, 4)) self.assertEqual(outputs.shape, (2 * self.num, 2)) ## Confirm the format is correct. zeros_chunck = np.zeros((self.num)) ones_chunck = np.ones((self.num)) np.testing.assert_array_equal(inputs[:self.num, 2], ones_chunck) np.testing.assert_array_equal(inputs[:self.num, 3], zeros_chunck) np.testing.assert_array_equal(outputs[:self.num, 0], ones_chunck) np.testing.assert_array_equal(outputs[:self.num, 1], zeros_chunck) np.testing.assert_array_equal(inputs[self.num:, 2], ones_chunck) np.testing.assert_array_equal(inputs[self.num:, 3], zeros_chunck) np.testing.assert_array_equal(outputs[self.num:, 0], zeros_chunck) np.testing.assert_array_equal(outputs[self.num:, 1], ones_chunck) ## add input/output shufflers for each compound, with batch label outputs. holder.add_shufflers(["compound"], ["batch"], None) test_shuffler = holder.data_shufflers[("control", "batch")] inputs, outputs = (forgetting_nuisance.get_dense_arr( test_shuffler.inputs), forgetting_nuisance.get_dense_arr( test_shuffler.outputs)) self.assertEqual(inputs.shape, (2 * self.num, 4)) self.assertEqual(outputs.shape, (2 * self.num, 2)) ## Confirm the format is correct. zeros_chunck = np.zeros((self.num)) ones_chunck = np.ones((self.num)) np.testing.assert_array_equal(inputs[:self.num, 2], ones_chunck) np.testing.assert_array_equal(inputs[:self.num, 3], zeros_chunck) np.testing.assert_array_equal(outputs[:self.num, 0], ones_chunck) np.testing.assert_array_equal(outputs[:self.num, 1], zeros_chunck) np.testing.assert_array_equal(inputs[self.num:, 2], zeros_chunck) np.testing.assert_array_equal(inputs[self.num:, 3], ones_chunck) np.testing.assert_array_equal(outputs[self.num:, 0], zeros_chunck) np.testing.assert_array_equal(outputs[self.num:, 1], ones_chunck) ## add input/output shufflers from groups specified by compound and batch ## to output labels specified by batch. ## Allow only shufflers that have control compounds only. holder.add_shufflers(["compound", "batch"], ["batch"], [("control", "batch1"), ("control", "batch2")]) test_shuffler = holder.data_shufflers[("control", "batch1"), "batch"] inputs, outputs = (forgetting_nuisance.get_dense_arr( test_shuffler.inputs), forgetting_nuisance.get_dense_arr( test_shuffler.outputs)) ## There should be 10 points in each set. self.assertEqual(inputs.shape, (self.num, 4)) ## There is only one possible output label for this set since both batch and ## compound have been fixed. self.assertEqual(outputs.shape, (self.num, 1))