def test_return_dataset_of_size_n(self): subset = RandomPicker().get_random_subset( Dataset(Apk('apk1'), Apk('apk2'), Apk('apk3')), 2) self.assertEqual(len(subset), 2) subset = RandomPicker().get_random_subset( Dataset(Apk('apk1'), Apk('apk2'), Apk('apk3'), Apk('apk4')), 3) self.assertEqual(len(subset), 3)
def test_return_subset_of_dataset(self): initial_dataset = Dataset(Apk('apk1'), Apk('apk2'), Apk('apk3'), Apk('apk4'), Apk('apk5'), Apk('apk6')) subset = RandomPicker().get_random_subset(initial_dataset, 4) self.assertTrue(initial_dataset.contains(subset)) subset = RandomPicker().get_random_subset(initial_dataset, 2) self.assertTrue(initial_dataset.contains(subset)) self.assertEqual(len(subset), 2)
def test_return_random_subset(self): initial_dataset = Dataset(Apk('apk1'), Apk('apk2'), Apk('apk3'), Apk('apk4'), Apk('apk5'), Apk('apk6')) picker = RandomPicker(seed=12) random_subset1 = picker.get_random_subset(initial_dataset, 3) expected_random_subset1 = Dataset(Apk('apk3'), Apk('apk4'), Apk('apk1')) self.assertEqual(random_subset1, expected_random_subset1) random_subset2 = picker.get_random_subset(initial_dataset, 3) expected_random_subset2 = Dataset(Apk('apk2'), Apk('apk5'), Apk('apk6')) self.assertEqual(random_subset2, expected_random_subset2) random_subset3 = picker.get_random_subset(initial_dataset, 3) expected_random_subset3 = Dataset(Apk('apk1'), Apk('apk4'), Apk('apk3')) self.assertEqual(random_subset3, expected_random_subset3)
def setUp(self): self.apk = Apk(sha256='1234', pkg_name='apk1') self.apk2 = Apk(sha256='5678', pkg_name='apk2') self.dataset = Dataset(self.apk, self.apk2) self.constructor_mock = mock.create_autospec(UrlConstructor) self.constructor_mock.construct.side_effect = lambda apk: ' https://' + apk.sha256 # use sha256 of apk as download url, just for a test self.dataset_downloader = DatasetDownloader( self.dataset, url_constructor=self.constructor_mock, out_dir='out')
def test_saves_metadata(self): dataset = Dataset(Apk(pkg_name='apk1', apk_size=8, dex_date='01-01-2001', markets='play'), Apk(pkg_name='apk2', apk_size=13, dex_date='01-03-2001', markets='play|china')) MetadataSaver(dataset=dataset, out_dir='out').save(['pkg_name', 'apk_size', 'dex_date', 'markets']) expected_out = 'pkg_name,apk_size,dex_date,markets\n\ apk1,8,01-01-2001,play\n\ apk2,13,01-03-2001,china|play\n\ ' self.assertTrue(os.path.exists(r'out/metadata.csv')) with open('out/metadata.csv') as f: self.assertEqual(expected_out, f.read())
def get_random_subset(self, input_dataset, requested_size): result = [] for num, apk in enumerate(input_dataset, 1): if requested_size is DownloadType.ALL or len( result) < requested_size: result.append(apk) else: s = int(random.random() * num) if s < requested_size: result[s] = apk return Dataset(*result)
def test_saves_metadata(self): dataset = Dataset( Apk(pkg_name='apk1', apk_size=8, dex_date='01-01-2001', markets='play'), Apk(pkg_name='apk2', apk_size=13, dex_date='01-03-2001', markets='play|china')) MetadataSaver(dataset=dataset, out_dir='out').save(['pkg_name', 'apk_size', 'dex_date']) self.assertTrue(os.path.exists(r'out\metadata.csv'))
def test_equals(self): self.assertEqual(Dataset(), Dataset()) dataset2 = Dataset(Apk('apk1'), Apk('apk2'), Apk('apk3'), Apk('apk4'), Apk('apk5')) self.assertEqual(self.dataset, dataset2) self.assertEqual(hash(self.dataset), hash(dataset2)) dataset2.add(Apk('apk6')) self.assertNotEqual(hash(self.dataset), hash(dataset2))
def test_returns_whole_dataset_if_dataset_size_less_then_requested_size( self): input_dataset = Dataset(Apk('apk1'), Apk('apk2')) subset = RandomPicker().get_random_subset(input_dataset, 4) self.assertEqual(subset, input_dataset)
def setUp(self): self.dataset = Dataset(Apk('apk1'), Apk('apk2'), Apk('apk3'), Apk('apk4'), Apk('apk5'))