def test_no_overwrite(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     with patch('edx.analytics.tasks.database_imports.HivePartitionTarget') as mock_target:
         output = mock_target()
         # Make MagicMock act more like a regular mock, so that flatten() does the right thing.
         del output.__iter__
         del output.__getitem__
         output.exists = Mock(return_value=False)
         self.assertFalse(task.complete())
         self.assertTrue(output.exists.called)
         output.exists = Mock(return_value=True)
         self.assertTrue(task.complete())
         self.assertTrue(output.exists.called)
 def test_no_overwrite(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     with patch('edx.analytics.tasks.database_imports.HivePartitionTarget'
                ) as mock_target:
         output = mock_target()
         # Make MagicMock act more like a regular mock, so that flatten() does the right thing.
         del output.__iter__
         del output.__getitem__
         output.exists = Mock(return_value=False)
         self.assertFalse(task.complete())
         self.assertTrue(output.exists.called)
         output.exists = Mock(return_value=True)
         self.assertTrue(task.complete())
         self.assertTrue(output.exists.called)
 def test_query_with_date_interval(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     query = task.query()
     expected_query = textwrap.dedent("""
         USE default;
         DROP TABLE IF EXISTS last_country_of_user;
         CREATE EXTERNAL TABLE last_country_of_user (
             country_name STRING,country_code STRING,username STRING
         )
         PARTITIONED BY (dt STRING)
         ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
         LOCATION 's3://output/path';
         ALTER TABLE last_country_of_user ADD PARTITION (dt = '2014-01-01');
         """)
     self.assertEquals(query, expected_query)
 def test_query_with_date_interval(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     query = task.query()
     expected_query = textwrap.dedent(
         """
         USE default;
         DROP TABLE IF EXISTS last_country_of_user;
         CREATE EXTERNAL TABLE last_country_of_user (
             country_name STRING,country_code STRING,username STRING
         )
         PARTITIONED BY (dt STRING)
         ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
         LOCATION 's3://output/path';
         ALTER TABLE last_country_of_user ADD PARTITION (dt = '2014-01-01');
         """
     )
     self.assertEquals(query, expected_query)
 def requires(self):
     return ImportLastCountryOfUserToHiveTask(
         mapreduce_engine=self.mapreduce_engine,
         n_reduce_tasks=self.n_reduce_tasks,
         source=self.source,
         interval=self.interval,
         pattern=self.pattern,
         geolocation_data=self.geolocation_data,
         overwrite=self.overwrite,
     )
 def requires(self):
     """
     This task reads from auth_user, auth_user_profile, and last_country_of_user, so require that they be
     loaded into Hive (via MySQL loads into Hive or via the pipeline as needed).
     """
     return [
         ImportAuthUserTask(overwrite=self.overwrite,
                            destination=self.warehouse_path),
         ImportAuthUserProfileTask(overwrite=self.overwrite,
                                   destination=self.warehouse_path),
         ImportLastCountryOfUserToHiveTask(
             overwrite=self.overwrite,
             interval=self.interval,
             user_country_output=self.user_country_output,
             n_reduce_tasks=self.n_reduce_tasks)
     ]
 def test_requires(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     required_task = task.requires()
     self.assertEquals(required_task.output().path, 's3://output/path/dt=2014-01-01')
 def test_overwrite(self):
     kwargs = self._get_kwargs()
     kwargs['overwrite'] = True
     task = ImportLastCountryOfUserToHiveTask(**kwargs)
     self.assertFalse(task.complete())
 def test_requires(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     required_task = task.requires()
     self.assertEquals(required_task.output().path,
                       's3://output/path/dt=2014-01-01')
 def test_overwrite(self):
     kwargs = self._get_kwargs()
     kwargs['overwrite'] = True
     task = ImportLastCountryOfUserToHiveTask(**kwargs)
     self.assertFalse(task.complete())
Esempio n. 11
0
 def test_requires(self):
     task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs())
     required_task = task.requires()
     self.assertEquals(
         required_task.output().path,
         's3://fake/warehouse/last_country_of_user/dt=2014-01-01')