def testDataShuffler2(self): ## Ensure that the data shuffler is reproducible, i.e. that running it ## multiple times given a random_state produces the same results. random_state_1 = np.random.RandomState(30) random_state_2 = np.random.RandomState(30) inputs = np.arange(200) outputs = np.arange(500, 700) data_shuffler1 = forgetting_nuisance.DataShuffler( inputs, outputs, random_state_1) data_shuffler2 = forgetting_nuisance.DataShuffler( inputs, outputs, random_state_2) sample_inputs_1, sample_outputs_1 = data_shuffler1.next_batch(200) sample_inputs_2, sample_outputs_2 = data_shuffler2.next_batch(200) self.assertTrue(np.array_equal(sample_inputs_1, sample_inputs_2)) self.assertTrue(np.array_equal(sample_outputs_1, sample_outputs_2))
def setUp(self): super(ForgettingNuisanceTest, self).setUp() self.num = 10 ## Make some data with controls versus drug. control = np.random.multivariate_normal([-10, 0], self.C(10, 2, np.pi / 6), self.num) drug = np.random.multivariate_normal([-20, 0], self.C(10, 2, np.pi / 6), self.num) inputs = np.vstack((control, drug)) outputs = np.zeros((inputs.shape[0], 2)) outputs[:self.num, 0] = 1 outputs[self.num:, 1] = 1 self.random_state = np.random.RandomState(seed=42) self.shuffler = forgetting_nuisance.DataShuffler(inputs, outputs, self.random_state) ## Make data with control versus drug on two separate batches. controls_batch_1 = np.random.multivariate_normal([0, 0], self.C(1, 1, 0), self.num) controls_batch_2 = np.random.multivariate_normal([10, 0], self.C(1, 1, 0), self.num) drug_batch_1 = controls_batch_1 + [3, 2] drug_batch_2 = controls_batch_2 + [3, -2] labels = ([("control", "batch1")] * len(controls_batch_1) + [ ("control", "batch2") ] * len(controls_batch_2) + [("drug", "batch1")] * len(drug_batch_1) + [("drug", "batch2")] * len(drug_batch_1)) self.dummy_df = pd.DataFrame( np.vstack( [controls_batch_1, controls_batch_2, drug_batch_1, drug_batch_2]), index=pd.MultiIndex.from_tuples(labels, names=["compound", "batch"]))
def testDataShuffler3(self): ## Ensure data shuffler works properly when batch size is larger than number ## of input/output pairs. inputs = np.arange(4) outputs = np.arange(10, 14) random_state = np.random.RandomState(30) data_shuffler = forgetting_nuisance.DataShuffler( inputs, outputs, random_state) sample_inputs_1, sample_outputs_1 = data_shuffler.next_batch(200) sample_inputs_2, sample_outputs_2 = data_shuffler.next_batch(200) self.assertCountEqual(sample_inputs_1, sample_inputs_2) self.assertCountEqual(sample_outputs_1, sample_outputs_2)