Exemple #1
0
    def _completion_handler(self, status, message=''):
        """
        Completion callback.
        :type status: int
        :param status:
        :return:
        """
        self.running = False

        if not self._want_quit:
            style = ''
            status_message = ''
            if status == workerthread.CompletionStatus.ABORT:
                status_message = 'Computation successfully aborted. '
            elif status == workerthread.CompletionStatus.DONE:
                open_folder(self.output_folder_path)
                status_message = 'Processing complete. '
            elif status == workerthread.CompletionStatus.FAIL:
                open_folder(self.output_folder_path)
                status_message = 'Processing failed. '
                style = 'error'

            status_notifier.update({
                'progress': (int(not status), 1),
                'sub_progress': (int(not status), 1)
            })
            self.action_button.Enable(True)
            self.action_button.SetLabel('Start')
            self.enable_ui(True)
            status_logger.info(status_message)

            self.StatusBar.SetStatusText('')

        with self._completion_lock:
            self._completion_lock.notifyAll()
Exemple #2
0
 def on_abort(self):
     if self.worker:
         # if the thread is running, don't just stop
         status_logger.info('Attempting to cancel, please wait...')
         self.worker.abort()
         # threading.Thread(target=self.worker.abort).start()
         self.action_button.Enable(False)
 def _show_message(self, message_data):
     """
     Display a simple message dialog.
     :param message_data: List, set, or tuple with the first pos as the message, and second pos as the style.
     :type message_data: (list, set, tuple)
     """
     status_logger.info(message_data)
Exemple #4
0
    def run(self):
        super(RulesPrep, self).run()

        status_logger.info('{} :: Processing rules data'.format(
            self.AGE_GROUP.capitalize()))
        status_notifier.update({'progress': 1})

        headers, matrix = DataPrep.read_input_file(self.input_file_path())

        headers.extend(ADDITIONAL_DATA.keys())

        status_notifier.update({'sub_progress': (0, len(matrix))})

        for index, row in enumerate(matrix):
            self.check_abort()

            status_notifier.update({'sub_progress': (index, )})

            self.expand_row(row, ADDITIONAL_DATA)

            for rule in self.rules:
                try:
                    if rule.logic_rule(row) is True:
                        row[RULES_CAUSE_NUM_KEY] = rule.CAUSE_ID
                        break
                except Exception as e:
                    warning_logger.warning(
                        'SID: {} rule `{}` failed complete: {}'.format(
                            row['sid'], rule, e.message))

        status_notifier.update({'sub_progress': None})

        DataPrep.write_output_file(headers, matrix, self.output_file_path())

        return matrix
Exemple #5
0
    def run(self):
        if self.reorganize:
            status_logger.info('Preparing output files.')
            for folder in (FOLDER1, FOLDER2, FOLDER3, FOLDER4):
                self.make_dir(self.working_dir_path, folder)
            self.make_dir(self.working_dir_path, INTERMEDIATES_FOLDER)

            self.organize_folder1()
            self.organize_folder2()
            self.organize_folder3()
            self.organize_folder4()
        if not self.keep_orig:
            self.clean_up()
Exemple #6
0
    def run(self):
        super(WHOPrep, self).run()

        status_logger.info('Mapping WHO Questionnaire')
        status_notifier.update({'progress': 1})

        headers = set(self.data_module.ADDITIONAL_HEADERS)
        headers.update(self.data_module.YES_NO_QUESTIONS)
        headers.update([h for h, _ in self.data_module.RECODE_QUESTIONS])
        headers.update(self.data_module.RENAME_QUESTIONS)
        headers.update(self.data_module.REVERSE_ONE_HOT_MULTISELECT)
        headers.update([h for h, _ in self.data_module.RECODE_MULTISELECT])
        headers.update(self.data_module.ONE_HOT_FROM_MULTISELECT)
        headers.update(self.data_module.UNIT_IF_AMOUNT)
        for unit_col, value_col, _ in self.data_module.DURATION_CONVERSIONS:
            headers.update([unit_col, value_col])

        _, matrix = DataPrep.read_input_file(self.input_file_path())

        status_notifier.update({'sub_progress': (0, len(matrix))})

        for index, row in enumerate(matrix):
            self.check_abort()

            status_notifier.update({'sub_progress': (index,)})

            self.determine_consent(row)
            self.calculate_age(row)
            self.recode_yes_no_questions(row)
            self.recode_categoricals(row)
            self.rename_questions(row)
            self.reverse_one_hot_multiselect(row)
            self.recode_multiselects(row)
            self.encode_one_hot_from_multiselect(row)
            self.map_units_from_values(row)
            self.convert_durations(row)
            self.map_adult_chest_pain_duration(row)
            self.map_child_illness_duration(row)
            self.map_neonate_first_cry(row)
            self.map_child_unconsciousness_start(row)
            self.map_neonate_delivery_type(row)
            self.map_child_birth_size(row)
            self.map_redundant_child_age_data(row)

        status_notifier.update({'sub_progress': None})

        DataPrep.write_output_file(sorted(headers), matrix,
                                   self.output_file_path(None))
def _completion_handler(status, message=''):
    """
    Completion event handler. Prints the result.
    :type status: int
    """
    if status == workerthread.CompletionStatus.ABORT:
        status_logger.info('Computation aborted.')
    elif status == workerthread.CompletionStatus.DONE:
        status_logger.info('Process completed.')
    elif status == workerthread.CompletionStatus.FAIL:
        status_logger.info('Unknown error occurred during processing.')
    if message:
        status_logger.info(message)
    sys.exit(status)
Exemple #8
0
 def _update_status(self):
     status_logger.info('Making cause graphs')
     status_notifier.update({'progress': 1})
 def _update_status(self):
     super(CSMFGrapher, self)._update_status()
     status_logger.info('Making CSMF graphs')
     status_notifier.update({'progress': 1})
    def run(self):
        """Perform initial processing step for preparing input data.

        Returns:
            tuple(bool): Tuple of bool values if VAs are present for Adult, Child, and Neonate.
        """
        super(CommonPrep, self).run()

        status_logger.info('Initial data prep')
        status_notifier.update({'progress': 1})

        headers, matrix = DataPrep.read_input_file(self.input_file_path())

        status_notifier.update({'sub_progress': (0, len(matrix))})

        # Extend the headers with additional headers and read the remaining data into the matrix
        additional_data = {k: '' for k in ADDITIONAL_HEADERS}
        if self.short_form:
            additional_data.update(SHORT_FORM_ADDITIONAL_HEADERS_DATA)
        additional_headers, additional_values = additional_headers_and_values(
            headers, additional_data.items())

        headers.extend(additional_headers)
        if 'child_1_8a' not in headers:
            headers.append('child_1_8a')

        for index, row in enumerate(matrix):
            self.check_abort()

            status_notifier.update({'sub_progress': (index, )})

            self.check_sids(row, index)

            if not self.check_consent(row, CONSENT_HEADER, index):
                warning_logger.info('SID: {} Refused consent.'.format(
                    row['sid']))
                continue

            self.expand_row(row,
                            dict(zip(additional_headers, additional_values)))

            self.correct_missing_age(row)

            try:
                self.convert_cell_to_int(row, AGE_VARS.values())
            except KeyError as e:
                warning_logger.error('Missing age variable: {}'.format(
                    e.message))
                missing_vars = [
                    var for var in AGE_VARS.values() if var not in headers
                ]
                status_logger.info('Cannot process data without: {}'.format(
                    ', '.join(missing_vars)))
                status_notifier.update('abort')
                continue

            for header, mapping in BINARY_CONVERSION_MAP.items():
                self.process_multiselect_vars(row, header, mapping)

            for header in COUNT_DATA_HEADERS:
                self.process_count_data(row, header)

            self.convert_rash_data(row, RASH_DATA)

            self.convert_weight_data(row, WEIGHT_CONVERSION_DATA)

            self.convert_free_text(row, FREE_TEXT_VARS, WORD_SUBS)

            self.save_row(row, index)

        status_notifier.update({'sub_progress': None})

        self.write_data(headers, self._matrix_data)

        return bool(self._matrix_data[ADULT]), bool(
            self._matrix_data[CHILD]), bool(self._matrix_data[NEONATE])
def main(*args, **kwargs):
    configure_logger()

    status_logger.info('')
    status_logger.info('Starting analysis with options:')
    status_logger.info('- Input file: {}'.format(kwargs['input']))
    status_logger.info('- Output folder: {}'.format(kwargs['output']))
    status_logger.info('- Country: {}'.format(kwargs['country']))
    status_logger.info('- HIV Region: {}'.format(kwargs['hiv']))
    status_logger.info('- Malaria Region: {}'.format(kwargs['malaria']))
    status_logger.info('- HCE variables: {}'.format(kwargs['hce']))
    status_logger.info('- Free text variables: {}'.format(kwargs['freetext']))
    status_logger.info('- Generate figures: {}'.format(kwargs['figures']))
    status_logger.info('')

    # Note - does not work on Windows with Python 2.7, does work elsewhere.
    _init_handle_shutdown()

    status_notifier.register(CommandLineNotificationHandler())

    options = {
        'hce': kwargs.pop('hce'),
        'free_text': kwargs.pop('freetext'),
        'hiv': kwargs.pop('hiv'),
        'malaria': kwargs.pop('malaria'),
        'figures': kwargs.pop('figures'),
        'language': kwargs.pop('language'),
        'legacy_format': kwargs.pop('legacy_format'),
    }

    global worker
    worker = workerthread.WorkerThread(kwargs['input'], kwargs['output'], options, kwargs['country'],
                                       completion_callback=_completion_handler)
    worker.join()
    sys.exit(getattr(worker, 'completion_status', 0))
Exemple #12
0
    def run(self):
        super(TariffPrep, self).run()

        status_logger.info('{:s} :: Processing tariffs'.format(
            self.AGE_GROUP.capitalize()))
        status_notifier.update({'progress': 1})

        # Headers are being dropped only from tariff matrix now because of the
        # way we are iterating over the pruned tariff data. It is unnecessary
        # to drop headers from other matrices.
        drop_headers = {TARIFF_CAUSE_NUM_KEY}
        if not self.hce:
            drop_headers.update(self.data_module.HCE_DROP_LIST)
        if not self.free_text:
            drop_headers.update(self.data_module.FREE_TEXT)
        if self.short_form:
            drop_headers.update(self.data_module.SHORT_FORM_DROP_LIST)

        tariffs = get_tariff_matrix(self.tariffs_filename, drop_headers,
                                    self.data_module.SPURIOUS_ASSOCIATIONS)

        self.cause_list = sorted(tariffs.keys())

        validated = self.read_input_file(self.validated_filename)[1]

        status_logger.info('{:s} :: Processing validation data.'.format(
            self.AGE_GROUP.capitalize()))
        train = self.process_training_data(validated, tariffs,
                                           self.data_module.FREQUENCIES,
                                           self.data_module.CUTOFF_POS,
                                           [.25, .5, .75])
        (uniform_train, uniform_scores, uniform_ranks, cutoffs,
         likelihoods) = train

        self.write_cutoffs(cutoffs)

        status_logger.info('{:s} :: Generating VA cause list.'.format(
            self.AGE_GROUP.capitalize()))
        user_data = self.read_input_file(self.input_file_path())[1]
        user_data = self.score_symptom_data(user_data, tariffs)

        status_logger.info('{:s} :: Generating cause rankings.'.format(
            self.AGE_GROUP.capitalize()))
        self.generate_cause_rankings(user_data, uniform_scores)

        self.write_intermediate_file(user_data, 'external-ranks', 'ranks')

        lowest_rank = len(uniform_train) + 0.5

        self.mask_ranks(user_data, len(uniform_train), cutoffs,
                        self.data_module.CAUSE_CONDITIONS, lowest_rank,
                        self.data_module.UNIFORM_LIST_POS,
                        self.data_module.MIN_CAUSE_SCORE)

        self.predict(user_data, lowest_rank, self.data_module.CAUSE_REDUCTION,
                     self.data_module.CAUSES, self.data_module.CAUSES46)

        self.determine_likelihood(user_data, likelihoods,
                                  self.data_module.CAUSE_REDUCTION)

        undetermined_weights = self._get_undetermined_matrix()
        csmf, csmf_by_sex = self.calculate_csmf(user_data,
                                                undetermined_weights)

        self.write_predictions(user_data)

        likelihood_names = [
            'Very Likely', 'Likely', 'Somewhat Likely', 'Possible'
        ]
        if self.language != 'english':
            path = os.path.join(config.basedir, 'data',
                                '{}.json'.format(self.language))
            with open(path, 'rb') as f:
                translation = json.load(f)
            likelihood_names = [
                translation['likelihoods'].get(likelihood)
                for likelihood in likelihood_names
            ]
        else:
            translation = None
        colors = ['#3CB371', '#47d147', '#8ae600', '#e6e600']
        mp = self.write_multiple_predictions_xlsx(user_data, tariffs,
                                                  likelihood_names, colors,
                                                  translation)
        self.write_multiple_predictions_csv(mp)

        self.write_csmf(self.AGE_GROUP, csmf)
        sex_name = {1: 'male', 2: 'female'}
        for sex, csmf_data in csmf_by_sex.items():
            key = '-'.join([self.AGE_GROUP, sex_name[sex]])
            self.write_csmf(key, csmf_data)

        self.write_intermediate_file(user_data, 'tariff-scores', 'scores')

        self.write_intermediate_file(user_data, 'tariff-ranks', 'ranks')

        return user_data
Exemple #13
0
    def run(self):
        status_logger.info('Preparing variable headers.')
        status_notifier.update({'progress': (0, 15), 'sub_progress': None})

        intermediate_dir = intermediate_dir_path(self.output_dir_path)
        figures_dir = os.path.join(self.output_dir_path, 'figures')

        self.make_dir(intermediate_dir_path(self.output_dir_path))

        try:
            self.format_headers(self.input_file_path, os.path.join(intermediate_dir, CLEAN_HEADERS_FILENAME))
        except StopIteration:
            # File doesn't contain data
            message = 'Source file "{}" does not contain data.'.format(self.input_file_path)
            self._complete(CompletionStatus.FAIL, message)
            warning_logger.warning(message)
            return

        report_logger.info('Analysis parameters:')
        report_logger.info('- Input file: {}'.format(self.input_file_path))
        report_logger.info('- Output folder: {}'.format(self.output_dir_path))
        report_logger.info('- Country: {}'.format(self.country))
        report_logger.info('- HIV Region: {}'.format(self.options.get('hiv', True)))
        report_logger.info('- Malaria Region: {}'.format(self.options.get('malaria', True)))
        report_logger.info('')

        file_path = os.path.join(intermediate_dir, CLEAN_HEADERS_FILENAME)
        who_questionnaire = self.who_questionaire_test(file_path)

        if who_questionnaire:
            self.short_form = True
            form_name = 'WHO 2016 Questionnaire'

        else:
            self.short_form = self.short_form_test(file_path)
            warning_logger.debug('Detected {} form'.format(
                'short' if self.short_form else 'standard'))
            if self.short_form:
                form_name = 'PHMRC Shortened Questionnaire'
            else:
                form_name = 'PHMRC Full Questionnaire'
        report_logger.info('Detected {}'.format(form_name))

        who_prep = WHOPrep(self.output_dir_path)
        common_prep = CommonPrep(self.output_dir_path, self.short_form)
        adult_pre_symptom = PreSymptomPrep(adult_pre_symptom_data, self.output_dir_path, self.short_form)
        adult_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.ADULT, ADULT_RULES)
        adult_symptom = SymptomPrep(adult_symptom_data, self.output_dir_path, self.short_form)
        adult_results = TariffPrep(adult_tariff_data, self.output_dir_path, self.short_form, self.options, self.country)
        child_pre_symptom = PreSymptomPrep(child_pre_symptom_data, self.output_dir_path, self.short_form)
        child_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.CHILD, CHILD_RULES)
        child_symptom = SymptomPrep(child_symptom_data, self.output_dir_path, self.short_form)
        child_results = TariffPrep(child_tariff_data, self.output_dir_path, self.short_form, self.options, self.country)
        neonate_pre_symptom = PreSymptomPrep(neonate_pre_symptom_data, self.output_dir_path, self.short_form)
        neonate_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.NEONATE, NEONATE_RULES)
        neonate_symptom = SymptomPrep(neonate_symptom_data, self.output_dir_path, self.short_form)
        neonate_results = TariffPrep(neonate_tariff_data, self.output_dir_path, self.short_form, self.options, self.country)
        legacy = self.options.get('legacy_format', False)
        output = OutputPrep(self.output_dir_path, reorganize=not legacy,
                            keep_orig=legacy, short_form=self.short_form,
                            free_text=self.options.get('free_text', True),
                            hce=self.options.get('hce', True))
        cause_grapher = CauseGrapher(self.output_dir_path)
        csmf_grapher = CSMFGrapher(self.output_dir_path)

        self._abort_list.extend([
            who_prep,
            common_prep,
            adult_pre_symptom,
            adult_rules,
            adult_symptom,
            adult_results,
            child_pre_symptom,
            child_rules,
            child_symptom,
            child_results,
            neonate_pre_symptom,
            neonate_rules,
            neonate_symptom,
            neonate_results,
            cause_grapher,
            csmf_grapher,
        ])

        try:
            if who_questionnaire:
                who_prep.run()

            # makes adult-prepped.csv, child-prepped.csv, neonate-prepped.csv
            adult_data, child_data, neonate_data = common_prep.run()

            if adult_data:
                # makes adult-presymptom.csv
                adult_pre_symptom.run()
                # makes adult-logic-rules.csv
                adult_rules.run()
                # makes adult-symptom.csv
                adult_symptom.run()
                # creates adult output files
                adult_results.run()

            if child_data:
                # makes child-presymptom.csv
                child_pre_symptom.run()
                # makes child-logic-rules.csv
                child_rules.run()
                # makes child-symptom.csv
                child_symptom.run()
                # creates child output files
                child_results.run()

            if neonate_data:
                # makes neonate-presymptom.csv
                neonate_pre_symptom.run()
                # makes neonate-logic-rules.csv
                neonate_rules.run()
                # makes neonate-symptom.csv
                neonate_symptom.run()
                # creates neonate output files
                neonate_results.run()

            if self.options.get('figures') and (adult_data or child_data or neonate_data):
                self.make_dir(figures_dir)
                # generate all cause graphs
                cause_grapher.run()
                # generate all csmf graphs
                csmf_grapher.run()

            output.run()

        except AbortException:
            self._complete(CompletionStatus.ABORT)
        except Exception:
            traceback.print_exc()
            self._complete(CompletionStatus.FAIL)
        else:
            self._complete(CompletionStatus.DONE)
Exemple #14
0
    def run(self):
        super(SymptomPrep, self).run()

        status_logger.info('{} :: Processing symptom data'.format(
            self.AGE_GROUP.capitalize()))
        status_notifier.update({'progress': 1})

        headers, matrix = DataPrep.read_input_file(self.input_file_path())

        status_notifier.update({'sub_progress': (0, len(matrix))})

        additional_data = {}
        additional_data.update(self.data_module.GENERATED_VARS_DATA)
        additional_headers, additional_values = additional_headers_and_values(
            headers, additional_data.items())

        headers.extend(additional_headers)
        self.rename_headers(headers, self.data_module.VAR_CONVERSION_MAP)

        keep_list = [
            header for header in headers
            if re.match(self.data_module.KEEP_PATTERN, header)
        ]
        drop_list = self.data_module.DROP_LIST

        headers = sorted([
            header for header in headers
            if header in keep_list and header not in drop_list
        ],
                         key=lambda t: (t != 'sid', t[1].isdigit(), t))

        for index, row in enumerate(matrix):
            self.check_abort()

            status_notifier.update({'sub_progress': (index, )})

            self.expand_row(row,
                            dict(zip(additional_headers, additional_values)))
            self.rename_vars(row, self.data_module.VAR_CONVERSION_MAP)

            self.copy_variables(row, self.data_module.COPY_VARS)

            # Compute age quartiles.
            self.process_progressive_value_data(
                row, self.data_module.AGE_QUARTILE_BINARY_VARS.items())

            self.process_cutoff_data(
                row, self.data_module.DURATION_CUTOFF_DATA.items())

            self.process_injury_data(row, self.data_module.INJURY_VARS.items())

            # Dichotomize!
            self.process_binary_vars(
                row, self.data_module.BINARY_CONVERSION_MAP.items())

            # Ensure all binary variables actually ARE 0 or 1:
            self.post_process_binary_variables(row,
                                               self.data_module.BINARY_VARS)

            self.censor_causes(row, self.data_module.CENSORED_MAP)

            self.require_symptoms(row, self.data_module.REQUIRED_MAP)

        status_notifier.update({'sub_progress': None})

        DataPrep.write_output_file(headers, matrix, self.output_file_path())

        return matrix
    def run(self):
        super(PreSymptomPrep, self).run()

        status_logger.info('{} :: Processing pre-symptom data'.format(
            self.AGE_GROUP.capitalize()))
        status_notifier.update({'progress': 1})

        # Create a list of duration variables, dropping specified variables if using the short form.
        duration_vars = self.data_module.DURATION_VARS[:]
        if self.short_form:
            for var in self.data_module.DURATION_VARS_SHORT_FORM_DROP_LIST:
                duration_vars.remove(var)

        headers, matrix = DataPrep.read_input_file(self.input_file_path())

        status_notifier.update({'sub_progress': (0, len(matrix))})

        # Identify new headers and data to be included.
        additional_data = {k: '' for k in self.data_module.DURATION_VARS}
        duration_day_vars = getattr(self.data_module, 'DURATION_DAYS_VARS', [])
        additional_data.update({k: '' for k in duration_day_vars})
        additional_data.update(
            {k: 0
             for k in self.data_module.GENERATED_VARS_DATA})
        additional_data.update(
            {k: 0
             for k in sorted(self.data_module.WORDS_TO_VARS.values())})
        additional_headers, additional_values = additional_headers_and_values(
            headers, additional_data.items())

        headers.extend(additional_headers)
        self.rename_headers(headers, self.data_module.VAR_CONVERSION_MAP)

        # Make a list of headers to keep and to drop.
        keep_list = [
            header for header in headers
            if re.match(self.data_module.KEEP_PATTERN, header)
        ]
        drop_list = (['{}a'.format(header) for header in duration_vars] +
                     ['{}b'.format(header) for header in duration_vars])

        # Prune headers and sort by 'sid', then anything that doesn't contain a digit at pos 1, then general vars.
        headers = sorted(
            [
                header for header in headers
                if header in keep_list and header not in drop_list
            ],
            key=lambda t:
            (t != 'sid', t[1].isdigit(), not t.startswith('g'), t))

        for index, row in enumerate(matrix):
            self.check_abort()

            status_notifier.update({'sub_progress': (index, )})
            self.expand_row(row,
                            dict(zip(additional_headers, additional_values)))
            self.rename_vars(row, self.data_module.VAR_CONVERSION_MAP)

            self.verify_answers_for_row(row, RANGE_LIST)

            self.fix_agedays(row)

            self.calculate_age_at_death_value(row)

            self.recode_answers(row, self.data_module.RECODE_MAP)

            self.process_binary_vars(
                row, self.data_module.BINARY_CONVERSION_MAP.items())

            self.calculate_duration_vars(
                row, duration_vars,
                self.data_module.DURATION_VARS_SPECIAL_CASE)

            self.validate_days_vars(row, duration_day_vars)

            self.validate_weight_vars(row, self.data_module.WEIGHT_VARS)

            self.validate_date_vars(row, self.data_module.DATE_VARS)

            self.process_age_vars(row)

            self.convert_free_text_vars(row, self.data_module.FREE_TEXT_VARS,
                                        self.data_module.WORDS_TO_VARS)

            if self.short_form:
                word_list = [
                    v for k, v in
                    self.data_module.SHORT_FORM_FREE_TEXT_CONVERSION.items()
                    if value_or_default(row.get(k)) == 1
                ]
                if word_list:
                    self.convert_free_text_words(
                        row, word_list, self.data_module.WORDS_TO_VARS)

            self.fix_rash_length(row)

            self.fix_rash_location(row)

            self.process_weight_sd_vars(
                row, getattr(self.data_module, 'EXAM_DATE_VARS', {}),
                getattr(self.data_module, 'WEIGHT_SD_DATA', {}))

            self.fill_missing_data(row, self.default_fill)

        status_notifier.update({'sub_progress': None})

        DataPrep.write_output_file(headers, matrix, self.output_file_path())

        return matrix