def _completion_handler(self, status, message=''): """ Completion callback. :type status: int :param status: :return: """ self.running = False if not self._want_quit: style = '' status_message = '' if status == workerthread.CompletionStatus.ABORT: status_message = 'Computation successfully aborted. ' elif status == workerthread.CompletionStatus.DONE: open_folder(self.output_folder_path) status_message = 'Processing complete. ' elif status == workerthread.CompletionStatus.FAIL: open_folder(self.output_folder_path) status_message = 'Processing failed. ' style = 'error' status_notifier.update({ 'progress': (int(not status), 1), 'sub_progress': (int(not status), 1) }) self.action_button.Enable(True) self.action_button.SetLabel('Start') self.enable_ui(True) status_logger.info(status_message) self.StatusBar.SetStatusText('') with self._completion_lock: self._completion_lock.notifyAll()
def on_abort(self): if self.worker: # if the thread is running, don't just stop status_logger.info('Attempting to cancel, please wait...') self.worker.abort() # threading.Thread(target=self.worker.abort).start() self.action_button.Enable(False)
def _show_message(self, message_data): """ Display a simple message dialog. :param message_data: List, set, or tuple with the first pos as the message, and second pos as the style. :type message_data: (list, set, tuple) """ status_logger.info(message_data)
def run(self): super(RulesPrep, self).run() status_logger.info('{} :: Processing rules data'.format( self.AGE_GROUP.capitalize())) status_notifier.update({'progress': 1}) headers, matrix = DataPrep.read_input_file(self.input_file_path()) headers.extend(ADDITIONAL_DATA.keys()) status_notifier.update({'sub_progress': (0, len(matrix))}) for index, row in enumerate(matrix): self.check_abort() status_notifier.update({'sub_progress': (index, )}) self.expand_row(row, ADDITIONAL_DATA) for rule in self.rules: try: if rule.logic_rule(row) is True: row[RULES_CAUSE_NUM_KEY] = rule.CAUSE_ID break except Exception as e: warning_logger.warning( 'SID: {} rule `{}` failed complete: {}'.format( row['sid'], rule, e.message)) status_notifier.update({'sub_progress': None}) DataPrep.write_output_file(headers, matrix, self.output_file_path()) return matrix
def run(self): if self.reorganize: status_logger.info('Preparing output files.') for folder in (FOLDER1, FOLDER2, FOLDER3, FOLDER4): self.make_dir(self.working_dir_path, folder) self.make_dir(self.working_dir_path, INTERMEDIATES_FOLDER) self.organize_folder1() self.organize_folder2() self.organize_folder3() self.organize_folder4() if not self.keep_orig: self.clean_up()
def run(self): super(WHOPrep, self).run() status_logger.info('Mapping WHO Questionnaire') status_notifier.update({'progress': 1}) headers = set(self.data_module.ADDITIONAL_HEADERS) headers.update(self.data_module.YES_NO_QUESTIONS) headers.update([h for h, _ in self.data_module.RECODE_QUESTIONS]) headers.update(self.data_module.RENAME_QUESTIONS) headers.update(self.data_module.REVERSE_ONE_HOT_MULTISELECT) headers.update([h for h, _ in self.data_module.RECODE_MULTISELECT]) headers.update(self.data_module.ONE_HOT_FROM_MULTISELECT) headers.update(self.data_module.UNIT_IF_AMOUNT) for unit_col, value_col, _ in self.data_module.DURATION_CONVERSIONS: headers.update([unit_col, value_col]) _, matrix = DataPrep.read_input_file(self.input_file_path()) status_notifier.update({'sub_progress': (0, len(matrix))}) for index, row in enumerate(matrix): self.check_abort() status_notifier.update({'sub_progress': (index,)}) self.determine_consent(row) self.calculate_age(row) self.recode_yes_no_questions(row) self.recode_categoricals(row) self.rename_questions(row) self.reverse_one_hot_multiselect(row) self.recode_multiselects(row) self.encode_one_hot_from_multiselect(row) self.map_units_from_values(row) self.convert_durations(row) self.map_adult_chest_pain_duration(row) self.map_child_illness_duration(row) self.map_neonate_first_cry(row) self.map_child_unconsciousness_start(row) self.map_neonate_delivery_type(row) self.map_child_birth_size(row) self.map_redundant_child_age_data(row) status_notifier.update({'sub_progress': None}) DataPrep.write_output_file(sorted(headers), matrix, self.output_file_path(None))
def _completion_handler(status, message=''): """ Completion event handler. Prints the result. :type status: int """ if status == workerthread.CompletionStatus.ABORT: status_logger.info('Computation aborted.') elif status == workerthread.CompletionStatus.DONE: status_logger.info('Process completed.') elif status == workerthread.CompletionStatus.FAIL: status_logger.info('Unknown error occurred during processing.') if message: status_logger.info(message) sys.exit(status)
def _update_status(self): status_logger.info('Making cause graphs') status_notifier.update({'progress': 1})
def _update_status(self): super(CSMFGrapher, self)._update_status() status_logger.info('Making CSMF graphs') status_notifier.update({'progress': 1})
def run(self): """Perform initial processing step for preparing input data. Returns: tuple(bool): Tuple of bool values if VAs are present for Adult, Child, and Neonate. """ super(CommonPrep, self).run() status_logger.info('Initial data prep') status_notifier.update({'progress': 1}) headers, matrix = DataPrep.read_input_file(self.input_file_path()) status_notifier.update({'sub_progress': (0, len(matrix))}) # Extend the headers with additional headers and read the remaining data into the matrix additional_data = {k: '' for k in ADDITIONAL_HEADERS} if self.short_form: additional_data.update(SHORT_FORM_ADDITIONAL_HEADERS_DATA) additional_headers, additional_values = additional_headers_and_values( headers, additional_data.items()) headers.extend(additional_headers) if 'child_1_8a' not in headers: headers.append('child_1_8a') for index, row in enumerate(matrix): self.check_abort() status_notifier.update({'sub_progress': (index, )}) self.check_sids(row, index) if not self.check_consent(row, CONSENT_HEADER, index): warning_logger.info('SID: {} Refused consent.'.format( row['sid'])) continue self.expand_row(row, dict(zip(additional_headers, additional_values))) self.correct_missing_age(row) try: self.convert_cell_to_int(row, AGE_VARS.values()) except KeyError as e: warning_logger.error('Missing age variable: {}'.format( e.message)) missing_vars = [ var for var in AGE_VARS.values() if var not in headers ] status_logger.info('Cannot process data without: {}'.format( ', '.join(missing_vars))) status_notifier.update('abort') continue for header, mapping in BINARY_CONVERSION_MAP.items(): self.process_multiselect_vars(row, header, mapping) for header in COUNT_DATA_HEADERS: self.process_count_data(row, header) self.convert_rash_data(row, RASH_DATA) self.convert_weight_data(row, WEIGHT_CONVERSION_DATA) self.convert_free_text(row, FREE_TEXT_VARS, WORD_SUBS) self.save_row(row, index) status_notifier.update({'sub_progress': None}) self.write_data(headers, self._matrix_data) return bool(self._matrix_data[ADULT]), bool( self._matrix_data[CHILD]), bool(self._matrix_data[NEONATE])
def main(*args, **kwargs): configure_logger() status_logger.info('') status_logger.info('Starting analysis with options:') status_logger.info('- Input file: {}'.format(kwargs['input'])) status_logger.info('- Output folder: {}'.format(kwargs['output'])) status_logger.info('- Country: {}'.format(kwargs['country'])) status_logger.info('- HIV Region: {}'.format(kwargs['hiv'])) status_logger.info('- Malaria Region: {}'.format(kwargs['malaria'])) status_logger.info('- HCE variables: {}'.format(kwargs['hce'])) status_logger.info('- Free text variables: {}'.format(kwargs['freetext'])) status_logger.info('- Generate figures: {}'.format(kwargs['figures'])) status_logger.info('') # Note - does not work on Windows with Python 2.7, does work elsewhere. _init_handle_shutdown() status_notifier.register(CommandLineNotificationHandler()) options = { 'hce': kwargs.pop('hce'), 'free_text': kwargs.pop('freetext'), 'hiv': kwargs.pop('hiv'), 'malaria': kwargs.pop('malaria'), 'figures': kwargs.pop('figures'), 'language': kwargs.pop('language'), 'legacy_format': kwargs.pop('legacy_format'), } global worker worker = workerthread.WorkerThread(kwargs['input'], kwargs['output'], options, kwargs['country'], completion_callback=_completion_handler) worker.join() sys.exit(getattr(worker, 'completion_status', 0))
def run(self): super(TariffPrep, self).run() status_logger.info('{:s} :: Processing tariffs'.format( self.AGE_GROUP.capitalize())) status_notifier.update({'progress': 1}) # Headers are being dropped only from tariff matrix now because of the # way we are iterating over the pruned tariff data. It is unnecessary # to drop headers from other matrices. drop_headers = {TARIFF_CAUSE_NUM_KEY} if not self.hce: drop_headers.update(self.data_module.HCE_DROP_LIST) if not self.free_text: drop_headers.update(self.data_module.FREE_TEXT) if self.short_form: drop_headers.update(self.data_module.SHORT_FORM_DROP_LIST) tariffs = get_tariff_matrix(self.tariffs_filename, drop_headers, self.data_module.SPURIOUS_ASSOCIATIONS) self.cause_list = sorted(tariffs.keys()) validated = self.read_input_file(self.validated_filename)[1] status_logger.info('{:s} :: Processing validation data.'.format( self.AGE_GROUP.capitalize())) train = self.process_training_data(validated, tariffs, self.data_module.FREQUENCIES, self.data_module.CUTOFF_POS, [.25, .5, .75]) (uniform_train, uniform_scores, uniform_ranks, cutoffs, likelihoods) = train self.write_cutoffs(cutoffs) status_logger.info('{:s} :: Generating VA cause list.'.format( self.AGE_GROUP.capitalize())) user_data = self.read_input_file(self.input_file_path())[1] user_data = self.score_symptom_data(user_data, tariffs) status_logger.info('{:s} :: Generating cause rankings.'.format( self.AGE_GROUP.capitalize())) self.generate_cause_rankings(user_data, uniform_scores) self.write_intermediate_file(user_data, 'external-ranks', 'ranks') lowest_rank = len(uniform_train) + 0.5 self.mask_ranks(user_data, len(uniform_train), cutoffs, self.data_module.CAUSE_CONDITIONS, lowest_rank, self.data_module.UNIFORM_LIST_POS, self.data_module.MIN_CAUSE_SCORE) self.predict(user_data, lowest_rank, self.data_module.CAUSE_REDUCTION, self.data_module.CAUSES, self.data_module.CAUSES46) self.determine_likelihood(user_data, likelihoods, self.data_module.CAUSE_REDUCTION) undetermined_weights = self._get_undetermined_matrix() csmf, csmf_by_sex = self.calculate_csmf(user_data, undetermined_weights) self.write_predictions(user_data) likelihood_names = [ 'Very Likely', 'Likely', 'Somewhat Likely', 'Possible' ] if self.language != 'english': path = os.path.join(config.basedir, 'data', '{}.json'.format(self.language)) with open(path, 'rb') as f: translation = json.load(f) likelihood_names = [ translation['likelihoods'].get(likelihood) for likelihood in likelihood_names ] else: translation = None colors = ['#3CB371', '#47d147', '#8ae600', '#e6e600'] mp = self.write_multiple_predictions_xlsx(user_data, tariffs, likelihood_names, colors, translation) self.write_multiple_predictions_csv(mp) self.write_csmf(self.AGE_GROUP, csmf) sex_name = {1: 'male', 2: 'female'} for sex, csmf_data in csmf_by_sex.items(): key = '-'.join([self.AGE_GROUP, sex_name[sex]]) self.write_csmf(key, csmf_data) self.write_intermediate_file(user_data, 'tariff-scores', 'scores') self.write_intermediate_file(user_data, 'tariff-ranks', 'ranks') return user_data
def run(self): status_logger.info('Preparing variable headers.') status_notifier.update({'progress': (0, 15), 'sub_progress': None}) intermediate_dir = intermediate_dir_path(self.output_dir_path) figures_dir = os.path.join(self.output_dir_path, 'figures') self.make_dir(intermediate_dir_path(self.output_dir_path)) try: self.format_headers(self.input_file_path, os.path.join(intermediate_dir, CLEAN_HEADERS_FILENAME)) except StopIteration: # File doesn't contain data message = 'Source file "{}" does not contain data.'.format(self.input_file_path) self._complete(CompletionStatus.FAIL, message) warning_logger.warning(message) return report_logger.info('Analysis parameters:') report_logger.info('- Input file: {}'.format(self.input_file_path)) report_logger.info('- Output folder: {}'.format(self.output_dir_path)) report_logger.info('- Country: {}'.format(self.country)) report_logger.info('- HIV Region: {}'.format(self.options.get('hiv', True))) report_logger.info('- Malaria Region: {}'.format(self.options.get('malaria', True))) report_logger.info('') file_path = os.path.join(intermediate_dir, CLEAN_HEADERS_FILENAME) who_questionnaire = self.who_questionaire_test(file_path) if who_questionnaire: self.short_form = True form_name = 'WHO 2016 Questionnaire' else: self.short_form = self.short_form_test(file_path) warning_logger.debug('Detected {} form'.format( 'short' if self.short_form else 'standard')) if self.short_form: form_name = 'PHMRC Shortened Questionnaire' else: form_name = 'PHMRC Full Questionnaire' report_logger.info('Detected {}'.format(form_name)) who_prep = WHOPrep(self.output_dir_path) common_prep = CommonPrep(self.output_dir_path, self.short_form) adult_pre_symptom = PreSymptomPrep(adult_pre_symptom_data, self.output_dir_path, self.short_form) adult_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.ADULT, ADULT_RULES) adult_symptom = SymptomPrep(adult_symptom_data, self.output_dir_path, self.short_form) adult_results = TariffPrep(adult_tariff_data, self.output_dir_path, self.short_form, self.options, self.country) child_pre_symptom = PreSymptomPrep(child_pre_symptom_data, self.output_dir_path, self.short_form) child_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.CHILD, CHILD_RULES) child_symptom = SymptomPrep(child_symptom_data, self.output_dir_path, self.short_form) child_results = TariffPrep(child_tariff_data, self.output_dir_path, self.short_form, self.options, self.country) neonate_pre_symptom = PreSymptomPrep(neonate_pre_symptom_data, self.output_dir_path, self.short_form) neonate_rules = RulesPrep(self.output_dir_path, self.short_form, common_data.NEONATE, NEONATE_RULES) neonate_symptom = SymptomPrep(neonate_symptom_data, self.output_dir_path, self.short_form) neonate_results = TariffPrep(neonate_tariff_data, self.output_dir_path, self.short_form, self.options, self.country) legacy = self.options.get('legacy_format', False) output = OutputPrep(self.output_dir_path, reorganize=not legacy, keep_orig=legacy, short_form=self.short_form, free_text=self.options.get('free_text', True), hce=self.options.get('hce', True)) cause_grapher = CauseGrapher(self.output_dir_path) csmf_grapher = CSMFGrapher(self.output_dir_path) self._abort_list.extend([ who_prep, common_prep, adult_pre_symptom, adult_rules, adult_symptom, adult_results, child_pre_symptom, child_rules, child_symptom, child_results, neonate_pre_symptom, neonate_rules, neonate_symptom, neonate_results, cause_grapher, csmf_grapher, ]) try: if who_questionnaire: who_prep.run() # makes adult-prepped.csv, child-prepped.csv, neonate-prepped.csv adult_data, child_data, neonate_data = common_prep.run() if adult_data: # makes adult-presymptom.csv adult_pre_symptom.run() # makes adult-logic-rules.csv adult_rules.run() # makes adult-symptom.csv adult_symptom.run() # creates adult output files adult_results.run() if child_data: # makes child-presymptom.csv child_pre_symptom.run() # makes child-logic-rules.csv child_rules.run() # makes child-symptom.csv child_symptom.run() # creates child output files child_results.run() if neonate_data: # makes neonate-presymptom.csv neonate_pre_symptom.run() # makes neonate-logic-rules.csv neonate_rules.run() # makes neonate-symptom.csv neonate_symptom.run() # creates neonate output files neonate_results.run() if self.options.get('figures') and (adult_data or child_data or neonate_data): self.make_dir(figures_dir) # generate all cause graphs cause_grapher.run() # generate all csmf graphs csmf_grapher.run() output.run() except AbortException: self._complete(CompletionStatus.ABORT) except Exception: traceback.print_exc() self._complete(CompletionStatus.FAIL) else: self._complete(CompletionStatus.DONE)
def run(self): super(SymptomPrep, self).run() status_logger.info('{} :: Processing symptom data'.format( self.AGE_GROUP.capitalize())) status_notifier.update({'progress': 1}) headers, matrix = DataPrep.read_input_file(self.input_file_path()) status_notifier.update({'sub_progress': (0, len(matrix))}) additional_data = {} additional_data.update(self.data_module.GENERATED_VARS_DATA) additional_headers, additional_values = additional_headers_and_values( headers, additional_data.items()) headers.extend(additional_headers) self.rename_headers(headers, self.data_module.VAR_CONVERSION_MAP) keep_list = [ header for header in headers if re.match(self.data_module.KEEP_PATTERN, header) ] drop_list = self.data_module.DROP_LIST headers = sorted([ header for header in headers if header in keep_list and header not in drop_list ], key=lambda t: (t != 'sid', t[1].isdigit(), t)) for index, row in enumerate(matrix): self.check_abort() status_notifier.update({'sub_progress': (index, )}) self.expand_row(row, dict(zip(additional_headers, additional_values))) self.rename_vars(row, self.data_module.VAR_CONVERSION_MAP) self.copy_variables(row, self.data_module.COPY_VARS) # Compute age quartiles. self.process_progressive_value_data( row, self.data_module.AGE_QUARTILE_BINARY_VARS.items()) self.process_cutoff_data( row, self.data_module.DURATION_CUTOFF_DATA.items()) self.process_injury_data(row, self.data_module.INJURY_VARS.items()) # Dichotomize! self.process_binary_vars( row, self.data_module.BINARY_CONVERSION_MAP.items()) # Ensure all binary variables actually ARE 0 or 1: self.post_process_binary_variables(row, self.data_module.BINARY_VARS) self.censor_causes(row, self.data_module.CENSORED_MAP) self.require_symptoms(row, self.data_module.REQUIRED_MAP) status_notifier.update({'sub_progress': None}) DataPrep.write_output_file(headers, matrix, self.output_file_path()) return matrix
def run(self): super(PreSymptomPrep, self).run() status_logger.info('{} :: Processing pre-symptom data'.format( self.AGE_GROUP.capitalize())) status_notifier.update({'progress': 1}) # Create a list of duration variables, dropping specified variables if using the short form. duration_vars = self.data_module.DURATION_VARS[:] if self.short_form: for var in self.data_module.DURATION_VARS_SHORT_FORM_DROP_LIST: duration_vars.remove(var) headers, matrix = DataPrep.read_input_file(self.input_file_path()) status_notifier.update({'sub_progress': (0, len(matrix))}) # Identify new headers and data to be included. additional_data = {k: '' for k in self.data_module.DURATION_VARS} duration_day_vars = getattr(self.data_module, 'DURATION_DAYS_VARS', []) additional_data.update({k: '' for k in duration_day_vars}) additional_data.update( {k: 0 for k in self.data_module.GENERATED_VARS_DATA}) additional_data.update( {k: 0 for k in sorted(self.data_module.WORDS_TO_VARS.values())}) additional_headers, additional_values = additional_headers_and_values( headers, additional_data.items()) headers.extend(additional_headers) self.rename_headers(headers, self.data_module.VAR_CONVERSION_MAP) # Make a list of headers to keep and to drop. keep_list = [ header for header in headers if re.match(self.data_module.KEEP_PATTERN, header) ] drop_list = (['{}a'.format(header) for header in duration_vars] + ['{}b'.format(header) for header in duration_vars]) # Prune headers and sort by 'sid', then anything that doesn't contain a digit at pos 1, then general vars. headers = sorted( [ header for header in headers if header in keep_list and header not in drop_list ], key=lambda t: (t != 'sid', t[1].isdigit(), not t.startswith('g'), t)) for index, row in enumerate(matrix): self.check_abort() status_notifier.update({'sub_progress': (index, )}) self.expand_row(row, dict(zip(additional_headers, additional_values))) self.rename_vars(row, self.data_module.VAR_CONVERSION_MAP) self.verify_answers_for_row(row, RANGE_LIST) self.fix_agedays(row) self.calculate_age_at_death_value(row) self.recode_answers(row, self.data_module.RECODE_MAP) self.process_binary_vars( row, self.data_module.BINARY_CONVERSION_MAP.items()) self.calculate_duration_vars( row, duration_vars, self.data_module.DURATION_VARS_SPECIAL_CASE) self.validate_days_vars(row, duration_day_vars) self.validate_weight_vars(row, self.data_module.WEIGHT_VARS) self.validate_date_vars(row, self.data_module.DATE_VARS) self.process_age_vars(row) self.convert_free_text_vars(row, self.data_module.FREE_TEXT_VARS, self.data_module.WORDS_TO_VARS) if self.short_form: word_list = [ v for k, v in self.data_module.SHORT_FORM_FREE_TEXT_CONVERSION.items() if value_or_default(row.get(k)) == 1 ] if word_list: self.convert_free_text_words( row, word_list, self.data_module.WORDS_TO_VARS) self.fix_rash_length(row) self.fix_rash_location(row) self.process_weight_sd_vars( row, getattr(self.data_module, 'EXAM_DATE_VARS', {}), getattr(self.data_module, 'WEIGHT_SD_DATA', {})) self.fill_missing_data(row, self.default_fill) status_notifier.update({'sub_progress': None}) DataPrep.write_output_file(headers, matrix, self.output_file_path()) return matrix