Ejemplo n.º 1
0
 def requires(self):
     kwargs_for_db_import = {
         'overwrite': self.overwrite,
     }
     kwargs_for_engagement = {
         'mapreduce_engine': self.mapreduce_engine,
         'n_reduce_tasks': self.n_reduce_tasks,
         'source': self.source,
         'interval': self.interval,
         'pattern': self.pattern,
         'overwrite': self.overwrite,
         'interval_type': self.interval_type,
     }
     # For enrollment, use the default start date and the current
     # interval's end date to calculate. Note that if it's already
     # calculated, this won't check the interval that was used.
     kwargs_for_enrollment = {
         'mapreduce_engine': self.mapreduce_engine,
         'n_reduce_tasks': self.n_reduce_tasks,
         'source': self.source,
         'interval_end': self.interval.date_b,  # pylint: disable=no-member
         'pattern': self.pattern,
         'overwrite': self.overwrite,
     }
     yield (
         StudentEngagementTableTask(**kwargs_for_engagement),
         ImportAuthUserTask(**kwargs_for_db_import),
         ImportCourseUserGroupTask(**kwargs_for_db_import),
         ImportCourseUserGroupUsersTask(**kwargs_for_db_import),
         CourseEnrollmentTableTask(**kwargs_for_enrollment),
     )
     # Only the weekly requires use of the calendar.
     if self.interval_type == "weekly":
         yield (CalendarTableTask(warehouse_path=self.warehouse_path, ))
Ejemplo n.º 2
0
 def requires(self):
     # Note that import parameters not included are 'destination', 'num_mappers', 'verbose',
     # and 'date' -- we will use the default values for those.
     kwargs_for_db_import = {
         'overwrite': self.overwrite,
     }
     yield (
         ImportLastCountryOfUserToHiveTask(
             mapreduce_engine=self.mapreduce_engine,
             n_reduce_tasks=self.n_reduce_tasks,
             source=self.source,
             interval=self.interval,
             pattern=self.pattern,
             geolocation_data=self.geolocation_data,
             overwrite=self.overwrite,
             user_country_output=self.user_country_output,
         ),
         InsertToMysqlLastCountryOfUserTask(
             mapreduce_engine=self.mapreduce_engine,
             n_reduce_tasks=self.n_reduce_tasks,
             source=self.source,
             interval=self.interval,
             pattern=self.pattern,
             geolocation_data=self.geolocation_data,
             overwrite=self.overwrite,
             user_country_output=self.user_country_output,
         ),
         # We can't make explicit dependencies on this yet, until we
         # solve the multiple-credentials problem, as well as the split-kwargs
         # problem.
         ImportStudentCourseEnrollmentTask(**kwargs_for_db_import),
         ImportAuthUserTask(**kwargs_for_db_import),
     )
Ejemplo n.º 3
0
 def requires(self):
     """
     This task reads from auth_user, auth_user_profile, and last_country_of_user, so require that they be
     loaded into Hive (via MySQL loads into Hive or via the pipeline as needed).
     """
     return [ImportAuthUserTask(overwrite=self.overwrite, destination=self.warehouse_path),
             ImportAuthUserProfileTask(overwrite=self.overwrite, destination=self.warehouse_path),
             ExternalLastCountryOfUserToHiveTask(date=self.date)]
Ejemplo n.º 4
0
 def requires(self):
     """
     This task reads from auth_user and user_activity_daily, so require that they be
     loaded into Hive (via MySQL loads into Hive or via the pipeline as needed).
     """
     return [ImportAuthUserTask(overwrite=False, destination=self.warehouse_path),
             UserActivityTableTask(interval=self.interval, warehouse_path=self.warehouse_path,
                                   n_reduce_tasks=self.n_reduce_tasks)]
Ejemplo n.º 5
0
    def requires(self):
        kwargs = {
            'destination': self.destination,
            'num_mappers': self.num_mappers,
            'verbose': self.verbose,
            'import_date': self.import_date,
            'overwrite': self.overwrite,
            'credentials': self.otto_credentials,
            'database': self.otto_database,
        }
        yield (
            # Otto User Table
            ImportEcommerceUser(**kwargs),

            # Otto Product Tables.
            ImportProductCatalog(**kwargs),
            ImportProductCatalogClass(**kwargs),
            ImportProductCatalogAttributes(**kwargs),
            ImportProductCatalogAttributeValues(**kwargs),

            # Otto Current State, Line Item, and Coupon Tables.
            ImportCurrentOrderState(**kwargs),
            ImportCurrentOrderLineState(**kwargs),
            ImportCurrentOrderDiscountState(**kwargs),
            ImportCouponVoucherIndirectionState(**kwargs),
            ImportCouponVoucherState(**kwargs),

            # Otto Refund Tables.
            ImportCurrentRefundRefundLineState(**kwargs),

            # Otto Partner Information.
            ImportEcommercePartner(**kwargs),
        )

        kwargs['credentials'] = self.credentials
        kwargs['database'] = self.database
        yield (
            # Shopping cart tables.
            ImportShoppingCartOrder(**kwargs),
            ImportShoppingCartOrderItem(**kwargs),
            ImportShoppingCartCertificateItem(**kwargs),
            ImportShoppingCartPaidCourseRegistration(**kwargs),
            ImportShoppingCartDonation(**kwargs),
            ImportShoppingCartCourseRegistrationCodeItem(**kwargs),
            ImportShoppingCartCoupon(**kwargs),
            ImportShoppingCartCouponRedemption(**kwargs),

            # Other LMS tables.
            ImportAuthUserTask(**kwargs),
        )
 def requires(self):
     """
     This task reads from auth_user, auth_user_profile, and last_country_of_user, so require that they be
     loaded into Hive (via MySQL loads into Hive or via the pipeline as needed).
     """
     return [
         ImportAuthUserTask(overwrite=self.overwrite,
                            destination=self.warehouse_path),
         ImportAuthUserProfileTask(overwrite=self.overwrite,
                                   destination=self.warehouse_path),
         ImportLastCountryOfUserToHiveTask(
             overwrite=self.overwrite,
             interval=self.interval,
             user_country_output=self.user_country_output,
             n_reduce_tasks=self.n_reduce_tasks)
     ]
 def requires(self):
     # Note that import parameters not included are 'destination', 'num_mappers', 'verbose',
     # and 'date' -- we will use the default values for those.
     kwargs_for_db_import = {
         'overwrite': self.overwrite,
     }
     yield (
         LastCountryOfUserPartitionTask(
             mapreduce_engine=self.mapreduce_engine,
             n_reduce_tasks=self.n_reduce_tasks,
             source=self.source,
             pattern=self.pattern,
             warehouse_path=self.warehouse_path,
             interval=self.interval,
             interval_start=self.interval_start,
             interval_end=self.interval_end,
             overwrite_n_days=self.overwrite_n_days,
             geolocation_data=self.geolocation_data,
             overwrite=self.overwrite,
         ),
         ImportStudentCourseEnrollmentTask(**kwargs_for_db_import),
         ImportAuthUserTask(**kwargs_for_db_import),
     )