コード例 #1
0
    def test_unicode(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write(u"bar")

        with t.open('r') as b:
            self.assertEqual(b.read(), u'bar')
コード例 #2
0
    def test_post_transformation(self, fact_mock, output_mock,
                                 requests_get_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{FB_TEST_DATA}/post_actual.json', 'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{FB_TEST_DATA}/post_expected.csv', 'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        # Overwrite requests 'get' return value to provide our test data
        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        requests_get_mock.return_value = mock_response

        facebook.MuseumFacts().run()
        facebook.FetchFbPosts().run()

        with output_target.open('r') as output_data:
            self.assertEqual(expected_data, output_data.read())
コード例 #3
0
    def test_with(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open('r') as b:
            self.assertEqual(list(b), ['bar'])
コード例 #4
0
    def test_pagination(self, fact_mock, output_mock, requests_get_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{FB_TEST_DATA}/post_next.json', 'r') \
                as next_data_in:
            next_data = next_data_in.read()

        with open(f'{FB_TEST_DATA}/post_previous.json', 'r') \
                as previous_data_in:
            previous_data = previous_data_in.read()

        def next_json():
            return json.loads(next_data)

        def previous_json():
            return json.loads(previous_data)

        next_response = MagicMock(ok=True, json=next_json)
        previous_response = MagicMock(ok=True, json=previous_json)

        requests_get_mock.side_effect = [next_response, previous_response]

        facebook.MuseumFacts().run()
        facebook.FetchFbPosts().run()

        self.assertEqual(requests_get_mock.call_count, 2)
コード例 #5
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
    def test_unicode(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write(u"bar")

        with t.open('r') as b:
            self.assertEqual(b.read(), u'bar')
コード例 #6
0
    def test_two(self, input_mock, output_mock):

        df_in0 = pd.DataFrame([[1, 'foo'], [2, 'bar']], columns=['a', 'b'])
        df_in1 = pd.DataFrame([[42, 'spam'], [1337, 'häm']],
                              columns=['a', 'b'])
        input_mock.return_value = [
            self.install_mock_target(MagicMock(),
                                     lambda file: df.to_csv(file, index=False))
            for df in [df_in0, df_in1]
        ]
        output_target = MockTarget(str(self), format=UTF8)
        output_mock.return_value = output_target

        self.task = ConcatCsvs()
        self.run_task(self.task)
        with output_target.open('r') as output:
            df_out = pd.read_csv(output)

        df_expected = pd.DataFrame([
            [1, 'foo'],
            [2, 'bar'],
            [42, 'spam'],
            [1337, 'häm'],
        ],
                                   columns=['a', 'b'])
        pd.testing.assert_frame_equal(df_expected, df_out)
コード例 #7
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
    def test_default_mode_value(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open() as b:
            self.assertEqual(list(b), ['bar'])
コード例 #8
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
    def test_bytes(self):
        t = MockTarget("foo", format=Nop)
        with t.open('wb') as b:
            b.write(b"bar")

        with t.open('rb') as b:
            self.assertEqual(list(b), [b'bar'])
コード例 #9
0
    def test_audience_gender_age_transformation(self, input_mock, output_mock,
                                                request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        input_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/audience_gender_age_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{IG_TEST_DATA}/audience_gender_age_expected.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        request_mock.return_value = mock_response

        instagram.MuseumFacts().run()
        with freeze_time('2020-01-01 00:00:05'):
            instagram.FetchIgAudienceGenderAge(columns=[
                col[0] for col in instagram.IgAudienceGenderAgeToDb().columns
            ]).run()

        with output_target.open('r') as output_data:
            self.assertEqual(output_data.read(), expected_data)
コード例 #10
0
    def test_fetch_twitter(self, output_mock):
        output_target = MockTarget('raw_out', format=UTF8)
        output_mock.return_value = output_target

        # Dirty workaround for pandas's peculiarities regarding default values
        none = object()
        expected_tweet = {
            'tweet_id': 1225435275301654531,
            'text': "#MuseumBarberini is cool!",
            'user_id': 1189538451097608193,
            'parent_tweet_id': none,
            'timestamp': '2020-02-06 16:05:11+01:00'
        }

        with freeze_time('2020-02-06'):
            # On this day our team's account has posted a related tweet
            # See https://twitter.com/bpfn2020/status/1225435275301654531
            FetchTwitter(timespan=dt.timedelta(days=1)).run()

        with output_target.open('r') as output_file:
            output_df = pd.read_csv(output_file)
        output_df = output_df.fillna(none)

        filtered_df = output_df
        for key, value in expected_tweet.items():
            filtered_df = filtered_df[filtered_df[key] == value]
            self.assertTrue(
                len(filtered_df) >= 1,
                f"Did not find any tweet with {key} = {value}")
コード例 #11
0
    def test_create_corpus(self, output_mock):

        # -------- SET UP MOCK DATA ------------
        output_target = MockTarget('corpus_out', format=luigi.format.Nop)
        output_mock.return_value = output_target
        self.db_connector.execute(
            '''
                INSERT INTO tweet(user_id,tweet_id,text,response_to,post_date)
                VALUES ('user_id', 'tweet_id', 'tweet text', NULL,
                        '2020-05-24 10:56:21')
            ''', '''
                INSERT INTO fb_post_comment(post_id,comment_id,post_date,
                    text,is_from_museum,response_to)
                VALUES ('post1','comment1','2020-05-24 10:56:21',
                        'text1',false,NULL),
                    ('post2','comment2','2018-05-24 10:56:21',
                        'text2',true,NULL)
            ''')

        # ------- RUN TASK UNDER TEST --------
        task = TopicModelingCreateCorpus()
        task.run()

        # ------- INSPECT OUTPUT -------
        with output_target.open("r") as fp:
            corpus = pickle.load(fp)  # nosec

        self.assertEqual(len(corpus), 2)
        self.assertIsInstance(corpus[0], Doc)
        self.assertIsInstance(corpus[1], Doc)
コード例 #12
0
    def test_exhibition_times(self, output_mock, requests_get_mock):

        output_target = MockTarget('exhibition_out', format=UTF8)
        output_mock.return_value = output_target

        with open('tests/test_data/gomus/exhibitions/exhibitions_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(
                'tests/test_data/gomus/exhibitions/'
                'exhibition_times_expected.csv',
                'r',
                encoding='utf-8') as data_out:
            expected_data = data_out.read()

        # Overwrite requests 'get' return value to provide our test data
        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        requests_get_mock.return_value = mock_response

        FetchExhibitionTimes().run()

        with output_target.open('r') as output_data:
            self.assertEqual(expected_data, output_data.read())
コード例 #13
0
    def test_empty_tweet_performance(self, output_mock, raw_tweets_mock):
        output_target = MockTarget('perform_extracted_out', format=UTF8)
        output_mock.return_value = output_target

        with open(
                'tests/test_data/twitter/empty_raw_tweets.csv',
                'r',
                encoding='utf-8') as data_in:
            raw_tweets = data_in.read()

        with open(
                'tests/test_data/twitter/empty_tweet_performance.csv',
                'r',
                encoding='utf-8') as data_out:
            expected_performance = data_out.read()

        self.install_mock_target(
            raw_tweets_mock,
            lambda file: file.write(raw_tweets))

        task = ExtractTweetPerformance(table='tweet_performance')
        task.run()

        with output_target.open('r') as output_file:
            output = output_file.read()
        self.assertEqual(output, expected_performance)
コード例 #14
0
    def test_extract_tweet_performance(self, output_mock, raw_tweets_mock):
        self.db_connector.execute('''
            INSERT INTO tweet (tweet_id) VALUES
                ('1234567890123456789'),
                ('111111111111111111'),
                ('2222222222222222222')
            ''')
        output_target = MockTarget('perform_extracted_out', format=UTF8)
        output_mock.return_value = output_target

        with open('tests/test_data/twitter/raw_tweets.csv',
                  'r',
                  encoding='utf-8') as data_in:
            raw_tweets = data_in.read()

        with open('tests/test_data/twitter/expected_tweet_performance.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_performance = data_out.read()

        self.install_mock_target(raw_tweets_mock,
                                 lambda file: file.write(raw_tweets))

        task = ExtractTweetPerformance(table='tweet_performance')
        task.run()

        with output_target.open('r') as output_file:
            output = output_file.read()
        self.assertEqual(
            output.split('\n')[0],
            expected_performance.split('\n')[0])
        for i in range(1, 3):
            self.assertEqual(  # cutting away the timestamp
                output.split('\n')[i].split(';')[:-1],
                expected_performance.split('\n')[i].split(';')[:-1])
コード例 #15
0
    def test_bytes(self):
        t = MockTarget("foo", format=Nop)
        with t.open('wb') as b:
            b.write(b"bar")

        with t.open('rb') as b:
            self.assertEqual(list(b), [b'bar'])
コード例 #16
0
    def test_pagination(self, fact_mock, output_mock, request_mock):
        # This is very similar to test_facebook.test_pagination

        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/post_next.json', 'r') \
                as next_data_in:
            next_data = next_data_in.read()

        with open(f'{IG_TEST_DATA}/post_previous.json', 'r') \
                as previous_data_in:
            previous_data = previous_data_in.read()

        def next_json():
            return json.loads(next_data)

        def previous_json():
            return json.loads(previous_data)

        next_response = MagicMock(ok=True, json=next_json)
        previous_response = MagicMock(ok=True, json=previous_json)

        request_mock.side_effect = [next_response, previous_response]

        self.run_task(instagram.FetchIgPosts())

        self.assertEqual(request_mock.call_count, 2)
コード例 #17
0
ファイル: test_info.py プロジェクト: stjordanis/gokart
class TestInfo(unittest.TestCase):
    def setUp(self) -> None:
        MockFileSystem().clear()

    @patch('luigi.LocalTarget',
           new=lambda path, **kwargs: MockTarget(path, **kwargs))
    def test_make_tree_info_pending(self):
        task = _Task(param=1, sub=_SubTask(param=2))

        # check before running
        tree = gokart.info.make_tree_info(task)
        expected = r"""
└─-\(PENDING\) _Task\[[a-z0-9]*\]
   └─-\(PENDING\) _SubTask\[[a-z0-9]*\]"""
        self.assertRegex(tree, expected)

    @patch('luigi.LocalTarget',
           new=lambda path, **kwargs: MockTarget(path, **kwargs))
    def test_make_tree_info_complete(self):
        task = _Task(param=1, sub=_SubTask(param=2))

        # check after sub task runs
        luigi.build([task], local_scheduler=True)
        tree = gokart.info.make_tree_info(task)
        expected = r"""
└─-\(COMPLETE\) _Task\[[a-z0-9]*\]
   └─-\(COMPLETE\) _SubTask\[[a-z0-9]*\]"""
        self.assertRegex(tree, expected)
コード例 #18
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
    def test_with(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open('r') as b:
            self.assertEqual(list(b), ['bar'])
コード例 #19
0
    def test_fetch_total_profile_metrics(self, fact_mock, output_mock,
                                         request_mock):
        fact_target = MockTarget('facts_in', format=UTF8)
        fact_mock.return_value = fact_target
        output_target = MockTarget('post_out', format=UTF8)
        output_mock.return_value = output_target

        with open(f'{IG_TEST_DATA}/total_profile_metrics_actual.json',
                  'r',
                  encoding='utf-8') as data_in:
            input_data = data_in.read()

        with open(f'{IG_TEST_DATA}/total_profile_metrics_expected.csv',
                  'r',
                  encoding='utf-8') as data_out:
            expected_data = data_out.read()

        def mock_json():
            return json.loads(input_data)

        mock_response = MagicMock(ok=True, json=mock_json)
        request_mock.return_value = mock_response

        with freeze_time('2020-01-01 00:00:05'):
            self.run_task(
                instagram.FetchIgTotalProfileMetrics(columns=[
                    col[0]
                    for col in instagram.IgTotalProfileMetricsToDb().columns
                ]))

        with output_target.open('r') as output_data:
            self.assertEqual(output_data.read(), expected_data)
コード例 #20
0
    def test_default_mode_value(self):
        t = MockTarget("foo")
        with t.open('w') as b:
            b.write("bar")

        with t.open() as b:
            self.assertEqual(list(b), ['bar'])
コード例 #21
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
    def test_1(self):
        t = MockTarget('test')
        p = t.open('w')
        print('test', file=p)
        p.close()

        q = t.open('r')
        self.assertEqual(list(q), ['test\n'])
        q.close()
コード例 #22
0
ファイル: range_test.py プロジェクト: xuezhizeng/luigi
 def output(self):
     base = self.dh.strftime('/even/%Y%m%d%H')
     if self.dh.hour % 2 == 0:
         return MockTarget(base)
     else:
         return {
             'spi': MockTarget(base + '/something.spi'),
             'spl': MockTarget(base + '/something.spl'),
         }
コード例 #23
0
    def test_1(self):
        t = MockTarget('test')
        p = t.open('w')
        print('test', file=p)
        p.close()

        q = t.open('r')
        self.assertEqual(list(q), ['test\n'])
        q.close()
コード例 #24
0
    def test_app_id(self, input_mock):

        input_target = MockTarget('museum_facts', format=UTF8)
        input_mock.return_value = input_target
        with input_target.open('w') as fp:
            json.dump({'ids': {'gplay': {'appId': 'some ID'}}}, fp)
        self.task._app_id = None

        app_id = FetchGplayReviews().app_id

        self.assertEqual(app_id, 'some ID')
コード例 #25
0
    def test_DownloadBotTemplate(self):
        # generate a fake target
        model_output = MockTarget("DownloadBotTemplate", format=format.Nop)

        # make a mock of DownloadImage
        class MockDownloadBotTemplate(DownloadBotTemplate):
            def output(self):
                return model_output

        self.assertFalse(model_output.exists())
        build([MockDownloadBotTemplate()], local_scheduler=True)
        self.assertTrue(model_output.exists())
コード例 #26
0
    def test_one(self, input_mock, output_mock):

        df_in = pd.DataFrame([[1, 'foo'], [2, 'bar']], columns=['a', 'b'])
        self.install_mock_target(input_mock,
                                 lambda file: df_in.to_csv(file, index=False))
        output_target = MockTarget(str(self))
        output_mock.return_value = output_target

        self.task = ConcatCsvs()
        self.run_task(self.task)
        with output_target.open('r') as output:
            df_out = pd.read_csv(output)

        pd.testing.assert_frame_equal(df_in, df_out)
コード例 #27
0
    def test_deterministic(self):
        MockTarget('data-pqr-zebra-Congo-2012-01-01').open('w').close()
        d = DataDump.latest(date=datetime.date(2012, 1, 10),
                            param='pqr',
                            a='zebra',
                            aa='Congo')
        self.assertEquals(d.date, datetime.date(2012, 1, 1))

        MockTarget('data-pqr-zebra-Congo-2012-01-05').open('w').close()
        d = DataDump.latest(date=datetime.date(2012, 1, 10),
                            param='pqr',
                            aa='Congo',
                            a='zebra')
        self.assertEquals(d.date, datetime.date(2012, 1,
                                                1))  # Should still be the same
コード例 #28
0
class FirstTask(Task):
    """
    FirstTask is an example.
    """

    batch = b.FirstBatch()
    target = MockTarget("first_task.txt")
コード例 #29
0
    def test_DownloadHtml(self):
        # generate a fake target
        image_output = MockTarget("DownloadHTMLTemplate", format=format.Nop)

        # make a mock of DownloadImage
        class MockDownloadHtml(DownloadHTMLTemplate):
            # Essentially here I want to override the output thanks to inheritance! Change this to a mock output instead
            def output(self):
                return image_output

        # make sure the output starts out as false
        self.assertFalse(image_output.exists())
        # run the task
        build([MockDownloadHtml()], local_scheduler=True)
        # make sure the output is now true
        self.assertTrue(image_output.exists())
コード例 #30
0
    def test_thumbnails_to_db(self, output_mock, uri_mock, to_db_mock):
        thumbnails = pd.read_csv(f'{IG_TEST_DATA}/post_thumbnails.csv')
        post_data = pd.read_csv(f'{IG_TEST_DATA}/post_expected.csv')

        # Prepare database with posts (some with, others without a thumbnail)
        merged = thumbnails.merge(post_data, on='permalink')
        post_data['thumbnail_uri'] = merged['thumbnail_uri']
        post_data.loc[post_data.index == len(post_data) - 1,
                      'thumbnail_uri'] = None
        input_task = DummyWriteCsvToDb(table=instagram.IgPostsToDb.table,
                                       csv=post_data.to_csv(index=False))
        self.run_task(input_task)

        # Mock get_thumbnail_uri() to answer mocked URIs directly
        output_target = MockTarget('post_out')
        output_mock.return_value = output_target
        uri_mock.side_effect = lambda permalink: \
            thumbnails[thumbnails['permalink'] == permalink][
                'thumbnail_uri'].values[0]
        to_db_mock.return_value = True

        # Let's go!
        self.run_task(instagram.IgPostThumbnailsToDb())

        actual_data = pd.DataFrame(self.db_connector.query(f'''
            SELECT permalink, thumbnail_uri
            FROM {instagram.IgPostsToDb.table}  -- # nosec - constant
        '''),
                                   columns=['permalink', 'thumbnail_uri'])
        pd.testing.assert_frame_equal(
            actual_data, thumbnails[['permalink', 'thumbnail_uri']])
        self.assertEqual(uri_mock.call_count,
                         post_data['thumbnail_uri'].isna().sum())
コード例 #31
0
    def prepare_input_target(self, input_mock, infile):
        input_target = MockTarget('data_in', format=UTF8)

        # FetchGomusReport returns iterable, to simulate this for most tasks:
        input_mock.return_value = iter([input_target])

        self.write_file_to_target(input_target, infile)
コード例 #32
0
class Task(luigi.Task):
    """
    Parent class of all concrete batch tasks.
    """

    task_namespace = 'batch'

    required_task = None
    batch = Batch
    target = MockTarget("task.txt")

    def requires(self):
        """
        Returns:
            `luigi.Task` object assigned to `required_task`.
        """
        return self.required_task

    def run(self):
        """
        This method executes a batch script.
        """

        self.batch.run()
        with self.output().open('w') as output:
            output.write("{task} says: Hello world!".format(
                task=self.__class__.__name__))

    def output(self):
        """
        Returns:
            `luigi.Target` object assigned to `target`
        """

        return self.target
コード例 #33
0
    def test_download_image(self):
        with TemporaryDirectory() as tmpdir:
            my_fake_file = "asdf.jpg"

            # upload fake image file to s3 bucket

            mock_output = MockTarget("...")

            class MockDownloadImage(MyPSET4Task):
                # Essentially here I want to override the output thanks to inheritance! Change this to a mock output instead
                def output(self):
                    return ...

            run_task(MockDownloadImage(filename="..."))

            self.assertTrue(mock_output.exists())
コード例 #34
0
ファイル: ra2mr.py プロジェクト: miniHive/assignment
 def get_output(self, fn):
     if self.exec_environment == ExecEnv.HDFS:
         return luigi.contrib.hdfs.HdfsTarget(fn)
     elif self.exec_environment == ExecEnv.MOCK:
         return MockTarget(fn)
     else:
         return luigi.LocalTarget(fn)
コード例 #35
0
ファイル: hadoop_test.py プロジェクト: ystopia/luigi
 def get_output(self, fn):
     if self.use_hdfs:
         return luigi.hdfs.HdfsTarget(
             '/tmp/' + fn,
             format=luigi.format.get_default_format() >>
             luigi.hdfs.PlainDir)
     else:
         return MockTarget(fn)
コード例 #36
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
 def test_mode_none_error(self):
     t = MockTarget("foo")
     with self.assertRaises(TypeError):
         with t.open(None) as b:
             b.write("bar")
コード例 #37
0
ファイル: mock_test.py プロジェクト: PeteW/luigi
 def _touch(self, path):
     t = MockTarget(path)
     with t.open('w'):
         pass