def test_unicode(self): t = MockTarget("foo") with t.open('w') as b: b.write(u"bar") with t.open('r') as b: self.assertEqual(b.read(), u'bar')
def test_post_transformation(self, fact_mock, output_mock, requests_get_mock): fact_target = MockTarget('facts_in', format=UTF8) fact_mock.return_value = fact_target output_target = MockTarget('post_out', format=UTF8) output_mock.return_value = output_target with open(f'{FB_TEST_DATA}/post_actual.json', 'r', encoding='utf-8') as data_in: input_data = data_in.read() with open(f'{FB_TEST_DATA}/post_expected.csv', 'r', encoding='utf-8') as data_out: expected_data = data_out.read() # Overwrite requests 'get' return value to provide our test data def mock_json(): return json.loads(input_data) mock_response = MagicMock(ok=True, json=mock_json) requests_get_mock.return_value = mock_response facebook.MuseumFacts().run() facebook.FetchFbPosts().run() with output_target.open('r') as output_data: self.assertEqual(expected_data, output_data.read())
def test_with(self): t = MockTarget("foo") with t.open('w') as b: b.write("bar") with t.open('r') as b: self.assertEqual(list(b), ['bar'])
def test_pagination(self, fact_mock, output_mock, requests_get_mock): fact_target = MockTarget('facts_in', format=UTF8) fact_mock.return_value = fact_target output_target = MockTarget('post_out', format=UTF8) output_mock.return_value = output_target with open(f'{FB_TEST_DATA}/post_next.json', 'r') \ as next_data_in: next_data = next_data_in.read() with open(f'{FB_TEST_DATA}/post_previous.json', 'r') \ as previous_data_in: previous_data = previous_data_in.read() def next_json(): return json.loads(next_data) def previous_json(): return json.loads(previous_data) next_response = MagicMock(ok=True, json=next_json) previous_response = MagicMock(ok=True, json=previous_json) requests_get_mock.side_effect = [next_response, previous_response] facebook.MuseumFacts().run() facebook.FetchFbPosts().run() self.assertEqual(requests_get_mock.call_count, 2)
def test_two(self, input_mock, output_mock): df_in0 = pd.DataFrame([[1, 'foo'], [2, 'bar']], columns=['a', 'b']) df_in1 = pd.DataFrame([[42, 'spam'], [1337, 'häm']], columns=['a', 'b']) input_mock.return_value = [ self.install_mock_target(MagicMock(), lambda file: df.to_csv(file, index=False)) for df in [df_in0, df_in1] ] output_target = MockTarget(str(self), format=UTF8) output_mock.return_value = output_target self.task = ConcatCsvs() self.run_task(self.task) with output_target.open('r') as output: df_out = pd.read_csv(output) df_expected = pd.DataFrame([ [1, 'foo'], [2, 'bar'], [42, 'spam'], [1337, 'häm'], ], columns=['a', 'b']) pd.testing.assert_frame_equal(df_expected, df_out)
def test_default_mode_value(self): t = MockTarget("foo") with t.open('w') as b: b.write("bar") with t.open() as b: self.assertEqual(list(b), ['bar'])
def test_bytes(self): t = MockTarget("foo", format=Nop) with t.open('wb') as b: b.write(b"bar") with t.open('rb') as b: self.assertEqual(list(b), [b'bar'])
def test_audience_gender_age_transformation(self, input_mock, output_mock, request_mock): fact_target = MockTarget('facts_in', format=UTF8) input_mock.return_value = fact_target output_target = MockTarget('post_out', format=UTF8) output_mock.return_value = output_target with open(f'{IG_TEST_DATA}/audience_gender_age_actual.json', 'r', encoding='utf-8') as data_in: input_data = data_in.read() with open(f'{IG_TEST_DATA}/audience_gender_age_expected.csv', 'r', encoding='utf-8') as data_out: expected_data = data_out.read() def mock_json(): return json.loads(input_data) mock_response = MagicMock(ok=True, json=mock_json) request_mock.return_value = mock_response instagram.MuseumFacts().run() with freeze_time('2020-01-01 00:00:05'): instagram.FetchIgAudienceGenderAge(columns=[ col[0] for col in instagram.IgAudienceGenderAgeToDb().columns ]).run() with output_target.open('r') as output_data: self.assertEqual(output_data.read(), expected_data)
def test_fetch_twitter(self, output_mock): output_target = MockTarget('raw_out', format=UTF8) output_mock.return_value = output_target # Dirty workaround for pandas's peculiarities regarding default values none = object() expected_tweet = { 'tweet_id': 1225435275301654531, 'text': "#MuseumBarberini is cool!", 'user_id': 1189538451097608193, 'parent_tweet_id': none, 'timestamp': '2020-02-06 16:05:11+01:00' } with freeze_time('2020-02-06'): # On this day our team's account has posted a related tweet # See https://twitter.com/bpfn2020/status/1225435275301654531 FetchTwitter(timespan=dt.timedelta(days=1)).run() with output_target.open('r') as output_file: output_df = pd.read_csv(output_file) output_df = output_df.fillna(none) filtered_df = output_df for key, value in expected_tweet.items(): filtered_df = filtered_df[filtered_df[key] == value] self.assertTrue( len(filtered_df) >= 1, f"Did not find any tweet with {key} = {value}")
def test_create_corpus(self, output_mock): # -------- SET UP MOCK DATA ------------ output_target = MockTarget('corpus_out', format=luigi.format.Nop) output_mock.return_value = output_target self.db_connector.execute( ''' INSERT INTO tweet(user_id,tweet_id,text,response_to,post_date) VALUES ('user_id', 'tweet_id', 'tweet text', NULL, '2020-05-24 10:56:21') ''', ''' INSERT INTO fb_post_comment(post_id,comment_id,post_date, text,is_from_museum,response_to) VALUES ('post1','comment1','2020-05-24 10:56:21', 'text1',false,NULL), ('post2','comment2','2018-05-24 10:56:21', 'text2',true,NULL) ''') # ------- RUN TASK UNDER TEST -------- task = TopicModelingCreateCorpus() task.run() # ------- INSPECT OUTPUT ------- with output_target.open("r") as fp: corpus = pickle.load(fp) # nosec self.assertEqual(len(corpus), 2) self.assertIsInstance(corpus[0], Doc) self.assertIsInstance(corpus[1], Doc)
def test_exhibition_times(self, output_mock, requests_get_mock): output_target = MockTarget('exhibition_out', format=UTF8) output_mock.return_value = output_target with open('tests/test_data/gomus/exhibitions/exhibitions_actual.json', 'r', encoding='utf-8') as data_in: input_data = data_in.read() with open( 'tests/test_data/gomus/exhibitions/' 'exhibition_times_expected.csv', 'r', encoding='utf-8') as data_out: expected_data = data_out.read() # Overwrite requests 'get' return value to provide our test data def mock_json(): return json.loads(input_data) mock_response = MagicMock(ok=True, json=mock_json) requests_get_mock.return_value = mock_response FetchExhibitionTimes().run() with output_target.open('r') as output_data: self.assertEqual(expected_data, output_data.read())
def test_empty_tweet_performance(self, output_mock, raw_tweets_mock): output_target = MockTarget('perform_extracted_out', format=UTF8) output_mock.return_value = output_target with open( 'tests/test_data/twitter/empty_raw_tweets.csv', 'r', encoding='utf-8') as data_in: raw_tweets = data_in.read() with open( 'tests/test_data/twitter/empty_tweet_performance.csv', 'r', encoding='utf-8') as data_out: expected_performance = data_out.read() self.install_mock_target( raw_tweets_mock, lambda file: file.write(raw_tweets)) task = ExtractTweetPerformance(table='tweet_performance') task.run() with output_target.open('r') as output_file: output = output_file.read() self.assertEqual(output, expected_performance)
def test_extract_tweet_performance(self, output_mock, raw_tweets_mock): self.db_connector.execute(''' INSERT INTO tweet (tweet_id) VALUES ('1234567890123456789'), ('111111111111111111'), ('2222222222222222222') ''') output_target = MockTarget('perform_extracted_out', format=UTF8) output_mock.return_value = output_target with open('tests/test_data/twitter/raw_tweets.csv', 'r', encoding='utf-8') as data_in: raw_tweets = data_in.read() with open('tests/test_data/twitter/expected_tweet_performance.csv', 'r', encoding='utf-8') as data_out: expected_performance = data_out.read() self.install_mock_target(raw_tweets_mock, lambda file: file.write(raw_tweets)) task = ExtractTweetPerformance(table='tweet_performance') task.run() with output_target.open('r') as output_file: output = output_file.read() self.assertEqual( output.split('\n')[0], expected_performance.split('\n')[0]) for i in range(1, 3): self.assertEqual( # cutting away the timestamp output.split('\n')[i].split(';')[:-1], expected_performance.split('\n')[i].split(';')[:-1])
def test_pagination(self, fact_mock, output_mock, request_mock): # This is very similar to test_facebook.test_pagination fact_target = MockTarget('facts_in', format=UTF8) fact_mock.return_value = fact_target output_target = MockTarget('post_out', format=UTF8) output_mock.return_value = output_target with open(f'{IG_TEST_DATA}/post_next.json', 'r') \ as next_data_in: next_data = next_data_in.read() with open(f'{IG_TEST_DATA}/post_previous.json', 'r') \ as previous_data_in: previous_data = previous_data_in.read() def next_json(): return json.loads(next_data) def previous_json(): return json.loads(previous_data) next_response = MagicMock(ok=True, json=next_json) previous_response = MagicMock(ok=True, json=previous_json) request_mock.side_effect = [next_response, previous_response] self.run_task(instagram.FetchIgPosts()) self.assertEqual(request_mock.call_count, 2)
class TestInfo(unittest.TestCase): def setUp(self) -> None: MockFileSystem().clear() @patch('luigi.LocalTarget', new=lambda path, **kwargs: MockTarget(path, **kwargs)) def test_make_tree_info_pending(self): task = _Task(param=1, sub=_SubTask(param=2)) # check before running tree = gokart.info.make_tree_info(task) expected = r""" └─-\(PENDING\) _Task\[[a-z0-9]*\] └─-\(PENDING\) _SubTask\[[a-z0-9]*\]""" self.assertRegex(tree, expected) @patch('luigi.LocalTarget', new=lambda path, **kwargs: MockTarget(path, **kwargs)) def test_make_tree_info_complete(self): task = _Task(param=1, sub=_SubTask(param=2)) # check after sub task runs luigi.build([task], local_scheduler=True) tree = gokart.info.make_tree_info(task) expected = r""" └─-\(COMPLETE\) _Task\[[a-z0-9]*\] └─-\(COMPLETE\) _SubTask\[[a-z0-9]*\]""" self.assertRegex(tree, expected)
def test_fetch_total_profile_metrics(self, fact_mock, output_mock, request_mock): fact_target = MockTarget('facts_in', format=UTF8) fact_mock.return_value = fact_target output_target = MockTarget('post_out', format=UTF8) output_mock.return_value = output_target with open(f'{IG_TEST_DATA}/total_profile_metrics_actual.json', 'r', encoding='utf-8') as data_in: input_data = data_in.read() with open(f'{IG_TEST_DATA}/total_profile_metrics_expected.csv', 'r', encoding='utf-8') as data_out: expected_data = data_out.read() def mock_json(): return json.loads(input_data) mock_response = MagicMock(ok=True, json=mock_json) request_mock.return_value = mock_response with freeze_time('2020-01-01 00:00:05'): self.run_task( instagram.FetchIgTotalProfileMetrics(columns=[ col[0] for col in instagram.IgTotalProfileMetricsToDb().columns ])) with output_target.open('r') as output_data: self.assertEqual(output_data.read(), expected_data)
def test_1(self): t = MockTarget('test') p = t.open('w') print('test', file=p) p.close() q = t.open('r') self.assertEqual(list(q), ['test\n']) q.close()
def output(self): base = self.dh.strftime('/even/%Y%m%d%H') if self.dh.hour % 2 == 0: return MockTarget(base) else: return { 'spi': MockTarget(base + '/something.spi'), 'spl': MockTarget(base + '/something.spl'), }
def test_app_id(self, input_mock): input_target = MockTarget('museum_facts', format=UTF8) input_mock.return_value = input_target with input_target.open('w') as fp: json.dump({'ids': {'gplay': {'appId': 'some ID'}}}, fp) self.task._app_id = None app_id = FetchGplayReviews().app_id self.assertEqual(app_id, 'some ID')
def test_DownloadBotTemplate(self): # generate a fake target model_output = MockTarget("DownloadBotTemplate", format=format.Nop) # make a mock of DownloadImage class MockDownloadBotTemplate(DownloadBotTemplate): def output(self): return model_output self.assertFalse(model_output.exists()) build([MockDownloadBotTemplate()], local_scheduler=True) self.assertTrue(model_output.exists())
def test_one(self, input_mock, output_mock): df_in = pd.DataFrame([[1, 'foo'], [2, 'bar']], columns=['a', 'b']) self.install_mock_target(input_mock, lambda file: df_in.to_csv(file, index=False)) output_target = MockTarget(str(self)) output_mock.return_value = output_target self.task = ConcatCsvs() self.run_task(self.task) with output_target.open('r') as output: df_out = pd.read_csv(output) pd.testing.assert_frame_equal(df_in, df_out)
def test_deterministic(self): MockTarget('data-pqr-zebra-Congo-2012-01-01').open('w').close() d = DataDump.latest(date=datetime.date(2012, 1, 10), param='pqr', a='zebra', aa='Congo') self.assertEquals(d.date, datetime.date(2012, 1, 1)) MockTarget('data-pqr-zebra-Congo-2012-01-05').open('w').close() d = DataDump.latest(date=datetime.date(2012, 1, 10), param='pqr', aa='Congo', a='zebra') self.assertEquals(d.date, datetime.date(2012, 1, 1)) # Should still be the same
class FirstTask(Task): """ FirstTask is an example. """ batch = b.FirstBatch() target = MockTarget("first_task.txt")
def test_DownloadHtml(self): # generate a fake target image_output = MockTarget("DownloadHTMLTemplate", format=format.Nop) # make a mock of DownloadImage class MockDownloadHtml(DownloadHTMLTemplate): # Essentially here I want to override the output thanks to inheritance! Change this to a mock output instead def output(self): return image_output # make sure the output starts out as false self.assertFalse(image_output.exists()) # run the task build([MockDownloadHtml()], local_scheduler=True) # make sure the output is now true self.assertTrue(image_output.exists())
def test_thumbnails_to_db(self, output_mock, uri_mock, to_db_mock): thumbnails = pd.read_csv(f'{IG_TEST_DATA}/post_thumbnails.csv') post_data = pd.read_csv(f'{IG_TEST_DATA}/post_expected.csv') # Prepare database with posts (some with, others without a thumbnail) merged = thumbnails.merge(post_data, on='permalink') post_data['thumbnail_uri'] = merged['thumbnail_uri'] post_data.loc[post_data.index == len(post_data) - 1, 'thumbnail_uri'] = None input_task = DummyWriteCsvToDb(table=instagram.IgPostsToDb.table, csv=post_data.to_csv(index=False)) self.run_task(input_task) # Mock get_thumbnail_uri() to answer mocked URIs directly output_target = MockTarget('post_out') output_mock.return_value = output_target uri_mock.side_effect = lambda permalink: \ thumbnails[thumbnails['permalink'] == permalink][ 'thumbnail_uri'].values[0] to_db_mock.return_value = True # Let's go! self.run_task(instagram.IgPostThumbnailsToDb()) actual_data = pd.DataFrame(self.db_connector.query(f''' SELECT permalink, thumbnail_uri FROM {instagram.IgPostsToDb.table} -- # nosec - constant '''), columns=['permalink', 'thumbnail_uri']) pd.testing.assert_frame_equal( actual_data, thumbnails[['permalink', 'thumbnail_uri']]) self.assertEqual(uri_mock.call_count, post_data['thumbnail_uri'].isna().sum())
def prepare_input_target(self, input_mock, infile): input_target = MockTarget('data_in', format=UTF8) # FetchGomusReport returns iterable, to simulate this for most tasks: input_mock.return_value = iter([input_target]) self.write_file_to_target(input_target, infile)
class Task(luigi.Task): """ Parent class of all concrete batch tasks. """ task_namespace = 'batch' required_task = None batch = Batch target = MockTarget("task.txt") def requires(self): """ Returns: `luigi.Task` object assigned to `required_task`. """ return self.required_task def run(self): """ This method executes a batch script. """ self.batch.run() with self.output().open('w') as output: output.write("{task} says: Hello world!".format( task=self.__class__.__name__)) def output(self): """ Returns: `luigi.Target` object assigned to `target` """ return self.target
def test_download_image(self): with TemporaryDirectory() as tmpdir: my_fake_file = "asdf.jpg" # upload fake image file to s3 bucket mock_output = MockTarget("...") class MockDownloadImage(MyPSET4Task): # Essentially here I want to override the output thanks to inheritance! Change this to a mock output instead def output(self): return ... run_task(MockDownloadImage(filename="...")) self.assertTrue(mock_output.exists())
def get_output(self, fn): if self.exec_environment == ExecEnv.HDFS: return luigi.contrib.hdfs.HdfsTarget(fn) elif self.exec_environment == ExecEnv.MOCK: return MockTarget(fn) else: return luigi.LocalTarget(fn)
def get_output(self, fn): if self.use_hdfs: return luigi.hdfs.HdfsTarget( '/tmp/' + fn, format=luigi.format.get_default_format() >> luigi.hdfs.PlainDir) else: return MockTarget(fn)
def test_mode_none_error(self): t = MockTarget("foo") with self.assertRaises(TypeError): with t.open(None) as b: b.write("bar")
def _touch(self, path): t = MockTarget(path) with t.open('w'): pass