def __init__(self, pipeline_name, raw_data_sources, phone_number_uuid_table, timestamp_remappings, rapid_pro_key_remappings, project_start_date, project_end_date, filter_test_messages, move_ws_messages, memory_profile_upload_bucket, data_archive_upload_bucket, bucket_dir_path, drive_upload=None): """ :param pipeline_name: The name of this pipeline. :type pipeline_name: str :param raw_data_sources: List of sources to pull the various raw run files from. :type raw_data_sources: list of RawDataSource :param phone_number_uuid_table: Configuration for the Firestore phone number <-> uuid table. :type phone_number_uuid_table: PhoneNumberUuidTable :param rapid_pro_key_remappings: List of rapid_pro_key -> pipeline_key remappings. :type rapid_pro_key_remappings: list of RapidProKeyRemapping :param project_start_date: When data collection started - all activation messages received before this date time will be dropped. :type project_start_date: datetime.datetime :param project_end_date: When data collection stopped - all activation messages received on or after this date time will be dropped. :type project_end_date: datetime.datetime :param filter_test_messages: Whether to filter out messages sent from the rapid_pro_test_contact_uuids :type filter_test_messages: bool :param move_ws_messages: Whether to move messages labelled as Wrong Scheme to the correct dataset. :type move_ws_messages: bool :param memory_profile_upload_bucket: The GS bucket name to upload the memory profile log to. This name will be appended with the log_dir_path and the file basename to generate the log upload location. :type memory_profile_upload_bucket: str :param data_archive_upload_bucket: The GS bucket name to upload the data archive file to. This name will be appended with the log_dir_path and the file basename to generate the archive upload location. :type data_archive_upload_bucket: str :param bucket_dir_path: The GS bucket folder path to store the data archive & memory log files to. :type bucket_dir_path: str :param drive_upload: Configuration for uploading to Google Drive, or None. If None, does not upload to Google Drive. :type drive_upload: DriveUploadPaths | None """ self.pipeline_name = pipeline_name self.raw_data_sources = raw_data_sources self.phone_number_uuid_table = phone_number_uuid_table self.timestamp_remappings = timestamp_remappings self.rapid_pro_key_remappings = rapid_pro_key_remappings self.project_start_date = project_start_date self.project_end_date = project_end_date self.filter_test_messages = filter_test_messages self.move_ws_messages = move_ws_messages self.drive_upload = drive_upload self.memory_profile_upload_bucket = memory_profile_upload_bucket self.data_archive_upload_bucket = data_archive_upload_bucket self.bucket_dir_path = bucket_dir_path PipelineConfiguration.RQA_CODING_PLANS = coding_plans.get_rqa_coding_plans(self.pipeline_name) PipelineConfiguration.DEMOG_CODING_PLANS = coding_plans.get_demog_coding_plans(self.pipeline_name) PipelineConfiguration.FOLLOW_UP_CODING_PLANS = coding_plans.get_follow_up_coding_plans(self.pipeline_name) PipelineConfiguration.SURVEY_CODING_PLANS += PipelineConfiguration.DEMOG_CODING_PLANS PipelineConfiguration.SURVEY_CODING_PLANS += PipelineConfiguration.FOLLOW_UP_CODING_PLANS PipelineConfiguration.WS_CORRECT_DATASET_SCHEME = coding_plans.get_ws_correct_dataset_scheme(self.pipeline_name) self.validate()
def __init__(self, pipeline_name, raw_data_sources, phone_number_uuid_table, timestamp_remappings, rapid_pro_key_remappings, project_start_date, project_end_date, filter_test_messages, move_ws_messages, memory_profile_upload_url_prefix, data_archive_upload_url_prefix, drive_upload=None): """ :param pipeline_name: The name of this pipeline. :type pipeline_name: str :param raw_data_sources: List of sources to pull the various raw run files from. :type raw_data_sources: list of RawDataSource :param phone_number_uuid_table: Configuration for the Firestore phone number <-> uuid table. :type phone_number_uuid_table: PhoneNumberUuidTable :param rapid_pro_key_remappings: List of rapid_pro_key -> pipeline_key remappings. :type rapid_pro_key_remappings: list of RapidProKeyRemapping :param project_start_date: When data collection started - all activation messages received before this date time will be dropped. :type project_start_date: datetime.datetime :param project_end_date: When data collection stopped - all activation messages received on or after this date time will be dropped. :type project_end_date: datetime.datetime :param filter_test_messages: Whether to filter out messages sent from the rapid_pro_test_contact_uuids :type filter_test_messages: bool :param move_ws_messages: Whether to move messages labelled as Wrong Scheme to the correct dataset. :type move_ws_messages: bool :param memory_profile_upload_url_prefix: The prefix of the GS URL to upload the memory profile log to. This prefix will be appended by the id of the pipeline run (provided as a command line argument), and the ".profile" file extension. :type memory_profile_upload_url_prefix: str :param drive_upload: Configuration for uploading to Google Drive, or None. If None, does not upload to Google Drive. :type drive_upload: DriveUploadPaths | None """ self.pipeline_name = pipeline_name self.raw_data_sources = raw_data_sources self.phone_number_uuid_table = phone_number_uuid_table self.timestamp_remappings = timestamp_remappings self.rapid_pro_key_remappings = rapid_pro_key_remappings self.project_start_date = project_start_date self.project_end_date = project_end_date self.filter_test_messages = filter_test_messages self.move_ws_messages = move_ws_messages self.drive_upload = drive_upload self.memory_profile_upload_url_prefix = memory_profile_upload_url_prefix self.data_archive_upload_url_prefix = data_archive_upload_url_prefix PipelineConfiguration.RQA_CODING_PLANS = coding_plans.get_rqa_coding_plans( self.pipeline_name) PipelineConfiguration.SURVEY_CODING_PLANS = coding_plans.get_survey_coding_plans( self.pipeline_name) # TODO: This is a temporary COVID19-specific hack to extract demographics out of the surveys, so that the # automated analysis can analyse demographics only. Not fixing this properly here for now, because we may # instead decide to analysis all of the survey questions in the same way as we do the demographics. PipelineConfiguration.DEMOG_CODING_PLANS = coding_plans.get_survey_coding_plans( self.pipeline_name)[0:3] PipelineConfiguration.WS_CORRECT_DATASET_SCHEME = coding_plans.get_ws_correct_dataset_scheme( self.pipeline_name) self.validate()