def __init__(self, pipeline_name, raw_data_sources, phone_number_uuid_table, timestamp_remappings,
                 rapid_pro_key_remappings, project_start_date, project_end_date, filter_test_messages, move_ws_messages,
                 memory_profile_upload_bucket, data_archive_upload_bucket, bucket_dir_path, drive_upload=None):
        """
        :param pipeline_name: The name of this pipeline.
        :type pipeline_name: str
        :param raw_data_sources: List of sources to pull the various raw run files from.
        :type raw_data_sources: list of RawDataSource
        :param phone_number_uuid_table: Configuration for the Firestore phone number <-> uuid table.
        :type phone_number_uuid_table: PhoneNumberUuidTable
        :param rapid_pro_key_remappings: List of rapid_pro_key -> pipeline_key remappings.
        :type rapid_pro_key_remappings: list of RapidProKeyRemapping
        :param project_start_date: When data collection started - all activation messages received before this date
                                   time will be dropped.
        :type project_start_date: datetime.datetime
        :param project_end_date: When data collection stopped - all activation messages received on or after this date
                                 time will be dropped.
        :type project_end_date: datetime.datetime
        :param filter_test_messages: Whether to filter out messages sent from the rapid_pro_test_contact_uuids
        :type filter_test_messages: bool
        :param move_ws_messages: Whether to move messages labelled as Wrong Scheme to the correct dataset.
        :type move_ws_messages: bool
        :param memory_profile_upload_bucket: The GS bucket name to upload the memory profile log to.
                                              This name will be appended with the log_dir_path
                                              and the file basename to generate the log upload location.
        :type memory_profile_upload_bucket: str
        :param data_archive_upload_bucket: The GS bucket name to upload the data archive file to.
                                            This name will be appended with the log_dir_path
                                            and the file basename to generate the archive upload location.
        :type data_archive_upload_bucket: str
        :param bucket_dir_path: The GS bucket folder path to store the data archive & memory log files to.
        :type bucket_dir_path: str
        :param drive_upload: Configuration for uploading to Google Drive, or None.
                             If None, does not upload to Google Drive.
        :type drive_upload: DriveUploadPaths | None
        """
        self.pipeline_name = pipeline_name
        self.raw_data_sources = raw_data_sources
        self.phone_number_uuid_table = phone_number_uuid_table
        self.timestamp_remappings = timestamp_remappings
        self.rapid_pro_key_remappings = rapid_pro_key_remappings
        self.project_start_date = project_start_date
        self.project_end_date = project_end_date
        self.filter_test_messages = filter_test_messages
        self.move_ws_messages = move_ws_messages
        self.drive_upload = drive_upload
        self.memory_profile_upload_bucket = memory_profile_upload_bucket
        self.data_archive_upload_bucket = data_archive_upload_bucket
        self.bucket_dir_path = bucket_dir_path

        PipelineConfiguration.RQA_CODING_PLANS = coding_plans.get_rqa_coding_plans(self.pipeline_name)
        PipelineConfiguration.DEMOG_CODING_PLANS = coding_plans.get_demog_coding_plans(self.pipeline_name)
        PipelineConfiguration.FOLLOW_UP_CODING_PLANS = coding_plans.get_follow_up_coding_plans(self.pipeline_name)
        PipelineConfiguration.SURVEY_CODING_PLANS += PipelineConfiguration.DEMOG_CODING_PLANS
        PipelineConfiguration.SURVEY_CODING_PLANS += PipelineConfiguration.FOLLOW_UP_CODING_PLANS
        PipelineConfiguration.WS_CORRECT_DATASET_SCHEME = coding_plans.get_ws_correct_dataset_scheme(self.pipeline_name)

        self.validate()
コード例 #2
0
    def __init__(self,
                 pipeline_name,
                 raw_data_sources,
                 phone_number_uuid_table,
                 timestamp_remappings,
                 rapid_pro_key_remappings,
                 project_start_date,
                 project_end_date,
                 filter_test_messages,
                 move_ws_messages,
                 memory_profile_upload_url_prefix,
                 data_archive_upload_url_prefix,
                 drive_upload=None):
        """
        :param pipeline_name: The name of this pipeline.
        :type pipeline_name: str
        :param raw_data_sources: List of sources to pull the various raw run files from.
        :type raw_data_sources: list of RawDataSource
        :param phone_number_uuid_table: Configuration for the Firestore phone number <-> uuid table.
        :type phone_number_uuid_table: PhoneNumberUuidTable
        :param rapid_pro_key_remappings: List of rapid_pro_key -> pipeline_key remappings.
        :type rapid_pro_key_remappings: list of RapidProKeyRemapping
        :param project_start_date: When data collection started - all activation messages received before this date
                                   time will be dropped.
        :type project_start_date: datetime.datetime
        :param project_end_date: When data collection stopped - all activation messages received on or after this date
                                 time will be dropped.
        :type project_end_date: datetime.datetime
        :param filter_test_messages: Whether to filter out messages sent from the rapid_pro_test_contact_uuids
        :type filter_test_messages: bool
        :param move_ws_messages: Whether to move messages labelled as Wrong Scheme to the correct dataset.
        :type move_ws_messages: bool
        :param memory_profile_upload_url_prefix: The prefix of the GS URL to upload the memory profile log to.
                                                 This prefix will be appended by the id of the pipeline run (provided
                                                 as a command line argument), and the ".profile" file extension.
        :type memory_profile_upload_url_prefix: str
        :param drive_upload: Configuration for uploading to Google Drive, or None.
                             If None, does not upload to Google Drive.
        :type drive_upload: DriveUploadPaths | None
        """
        self.pipeline_name = pipeline_name
        self.raw_data_sources = raw_data_sources
        self.phone_number_uuid_table = phone_number_uuid_table
        self.timestamp_remappings = timestamp_remappings
        self.rapid_pro_key_remappings = rapid_pro_key_remappings
        self.project_start_date = project_start_date
        self.project_end_date = project_end_date
        self.filter_test_messages = filter_test_messages
        self.move_ws_messages = move_ws_messages
        self.drive_upload = drive_upload
        self.memory_profile_upload_url_prefix = memory_profile_upload_url_prefix
        self.data_archive_upload_url_prefix = data_archive_upload_url_prefix

        PipelineConfiguration.RQA_CODING_PLANS = coding_plans.get_rqa_coding_plans(
            self.pipeline_name)
        PipelineConfiguration.SURVEY_CODING_PLANS = coding_plans.get_survey_coding_plans(
            self.pipeline_name)

        # TODO: This is a temporary COVID19-specific hack to extract demographics out of the surveys, so that the
        #       automated analysis can analyse demographics only. Not fixing this properly here for now, because we may
        #       instead decide to analysis all of the survey questions in the same way as we do the demographics.
        PipelineConfiguration.DEMOG_CODING_PLANS = coding_plans.get_survey_coding_plans(
            self.pipeline_name)[0:3]
        PipelineConfiguration.WS_CORRECT_DATASET_SCHEME = coding_plans.get_ws_correct_dataset_scheme(
            self.pipeline_name)

        self.validate()