コード例 #1
0
 def test_no_overwrite(self):
     # kwargs = {'overwrite': False}
     kwargs = {}
     task = ImportStudentCourseEnrollmentTask(**kwargs)
     with patch('edx.analytics.tasks.database_imports.HivePartitionTarget') as mock_target:
         output = mock_target()
         # Make MagicMock act more like a regular mock, so that flatten() does the right thing.
         del output.__iter__
         del output.__getitem__
         output.exists = Mock(return_value=False)
         self.assertFalse(task.complete())
         self.assertTrue(output.exists.called)
         output.exists = Mock(return_value=True)
         self.assertTrue(task.complete())
         self.assertTrue(output.exists.called)
コード例 #2
0
 def test_no_overwrite(self):
     # kwargs = {'overwrite': False}
     kwargs = {}
     task = ImportStudentCourseEnrollmentTask(**kwargs)
     with patch('edx.analytics.tasks.database_imports.HivePartitionTarget'
                ) as mock_target:
         output = mock_target()
         # Make MagicMock act more like a regular mock, so that flatten() does the right thing.
         del output.__iter__
         del output.__getitem__
         output.exists = Mock(return_value=False)
         self.assertFalse(task.complete())
         self.assertTrue(output.exists.called)
         output.exists = Mock(return_value=True)
         self.assertTrue(task.complete())
         self.assertTrue(output.exists.called)
コード例 #3
0
 def requires(self):
     # Note that import parameters not included are 'destination', 'num_mappers', 'verbose',
     # and 'date' -- we will use the default values for those.
     kwargs_for_db_import = {
         'overwrite': self.overwrite,
     }
     yield (
         ImportLastCountryOfUserToHiveTask(
             mapreduce_engine=self.mapreduce_engine,
             n_reduce_tasks=self.n_reduce_tasks,
             source=self.source,
             interval=self.interval,
             pattern=self.pattern,
             geolocation_data=self.geolocation_data,
             overwrite=self.overwrite,
             user_country_output=self.user_country_output,
         ),
         InsertToMysqlLastCountryOfUserTask(
             mapreduce_engine=self.mapreduce_engine,
             n_reduce_tasks=self.n_reduce_tasks,
             source=self.source,
             interval=self.interval,
             pattern=self.pattern,
             geolocation_data=self.geolocation_data,
             overwrite=self.overwrite,
             user_country_output=self.user_country_output,
         ),
         # We can't make explicit dependencies on this yet, until we
         # solve the multiple-credentials problem, as well as the split-kwargs
         # problem.
         ImportStudentCourseEnrollmentTask(**kwargs_for_db_import),
         ImportAuthUserTask(**kwargs_for_db_import),
     )
コード例 #4
0
    def test_query_with_date(self):
        kwargs = {'import_date': datetime.datetime.strptime('2014-07-01', '%Y-%m-%d').date()}
        task = ImportStudentCourseEnrollmentTask(**kwargs)
        query = task.query()
        expected_query = textwrap.dedent(
            """
            USE default;
            DROP TABLE IF EXISTS student_courseenrollment;
            CREATE EXTERNAL TABLE student_courseenrollment (
                id INT,user_id INT,course_id STRING,created TIMESTAMP,is_active BOOLEAN,mode STRING
            )
            PARTITIONED BY (dt STRING)

            LOCATION 's3://foo/bar/student_courseenrollment';
            ALTER TABLE student_courseenrollment ADD PARTITION (dt = '2014-07-01');
            """
        )
        self.assertEquals(query, expected_query)
コード例 #5
0
    def test_query_with_date(self):
        kwargs = {
            'import_date':
            datetime.datetime.strptime('2014-07-01', '%Y-%m-%d').date()
        }
        task = ImportStudentCourseEnrollmentTask(**kwargs)
        query = task.query()
        expected_query = textwrap.dedent("""
            USE default;
            DROP TABLE IF EXISTS student_courseenrollment;
            CREATE EXTERNAL TABLE student_courseenrollment (
                id INT,user_id INT,course_id STRING,created TIMESTAMP,is_active BOOLEAN,mode STRING
            )
            PARTITIONED BY (dt STRING)

            LOCATION 's3://foo/bar/student_courseenrollment';
            ALTER TABLE student_courseenrollment ADD PARTITION (dt = '2014-07-01');
            """)
        self.assertEquals(query, expected_query)
コード例 #6
0
 def requires(self):
     yield (
         ImportCourseModeTask(
             import_date=self.import_date
         ),
         ImportStudentCourseEnrollmentTask(
             import_date=self.import_date
         ),
         ReconciledOrderTransactionTableTask(
             import_date=self.import_date,
             n_reduce_tasks=self.n_reduce_tasks
         )
     )
コード例 #7
0
 def requires(self):
     # Note that import parameters not included are 'destination', 'num_mappers', 'verbose',
     # and 'date' -- we will use the default values for those.
     kwargs_for_db_import = {
         'overwrite': self.overwrite,
     }
     yield (
         LastCountryOfUserPartitionTask(
             mapreduce_engine=self.mapreduce_engine,
             n_reduce_tasks=self.n_reduce_tasks,
             source=self.source,
             pattern=self.pattern,
             warehouse_path=self.warehouse_path,
             interval=self.interval,
             interval_start=self.interval_start,
             interval_end=self.interval_end,
             overwrite_n_days=self.overwrite_n_days,
             geolocation_data=self.geolocation_data,
             overwrite=self.overwrite,
         ),
         ImportStudentCourseEnrollmentTask(**kwargs_for_db_import),
         ImportAuthUserTask(**kwargs_for_db_import),
     )
コード例 #8
0
 def test_overwrite(self):
     kwargs = {'overwrite': True}
     task = ImportStudentCourseEnrollmentTask(**kwargs)
     self.assertFalse(task.complete())
コード例 #9
0
 def requires_hadoop(self):
     # Instead of just pointing to the output directory of a dump, let's make sure
     # there is a dump.
     # We don't have a way to just dump the Mysql table, so deal with the Hive table
     # definition as well.
     yield ImportStudentCourseEnrollmentTask(credentials=self.credentials)
コード例 #10
0
 def requires(self):
     return ImportStudentCourseEnrollmentTask(import_date=self.run_date, destination=self.warehouse_path)
コード例 #11
0
 def test_overwrite(self):
     kwargs = {'overwrite': True}
     task = ImportStudentCourseEnrollmentTask(**kwargs)
     self.assertFalse(task.complete())