def get_all_datasets(): """ Get all dataset by scan output folder, if folder name start begin DATASET_PREFIX, that's mean this dataset generate by rest api, it should be returned. If get configuration by title failed, then this api will skip the dataset :return: the rest api generated datasets """ datasets_folders = [f for f in listdir(GENERATED_DATASETS_DIR) if isdir(join(GENERATED_DATASETS_DIR, f))] datasets = [] for dataset_name in datasets_folders: if not dataset_name.startswith(DATASET_PREFIX): # not generate by rest api continue dataset_parts = dataset_name.split('.') name = len(dataset_parts) > 1 and dataset_parts[1] or 'ERROR TO GET NAME' output_format = len(dataset_parts) > 3 and dataset_parts[3] or 'CCDA' try: configurations = dataset_configuration_service.get_configuration_by_title(name) datasets.append({ 'title': name, 'completedOn': datetime.fromtimestamp( stat(join(GENERATED_DATASETS_DIR, dataset_name)).st_mtime).isoformat(), 'configuration': configurations, 'outputFormat': output_format, 'datasetName': dataset_name }) except Exception as e: # if get configuration error, then skip this dataset, so we don't need raise error here logger.error(e) if len(datasets) > 0: datasets = sorted(datasets, key=lambda k: k['completedOn'], reverse=True) return datasets
def get_morbidities_from_war_code(war_code, include_percentage=False): """ Get morbidities by war code. If the result is already cached, then return the cached result. If not found raise EntityNotFoundError :param war_code: the war code :param include_percentage: True if percentage of morbidity should be added to output, False otherwise :return: the list with morbidities data """ global morbidity_map if morbidity_map.get(war_code) is not None: # return cached data return morbidity_map.get(war_code) file_path = DATASOURCES_DIR + '/morbidity_' + war_code + '.csv' try: with open(file_path, 'rU') as csv_file: morbidity_raw_list = csv.reader(csv_file, delimiter=',', quotechar='"') morbidity_list = [] first = True for row in morbidity_raw_list: if first: # skip first row with header first = False continue item = {'name': row[0], 'icd10Code': row[1]} if include_percentage: item['percentOfProbabilityToAcquireDiagnosis'] = float(row[2]) morbidity_list.append(item) morbidity_map[war_code] = morbidity_list return morbidity_list except Exception as e: logger.error(e) raise EntityNotFoundError('Could not open {0}. Error: {1}'.format(file_path, e))
def get_wars_from_file(): """ Get war eras from file. Return cache result if data is already cached. :return: the war eras """ global military_eras if military_eras is not None: # return cached data return military_eras wars_data_path = DATASOURCES_DIR + '/military_eras.csv' try: # load the military_eras datasource with open(wars_data_path, 'rU') as csv_file: military_eras_list = csv.reader(csv_file, delimiter=',', quotechar='"') military_eras = [] for row in military_eras_list: if row[0] == 'war_code': # skip first element (titles row) continue war_code = row[0] war_name = row[1] percentage = float(row[2]) start_date = str_to_datetime(row[3]) end_date = str_to_datetime(row[4]) military_eras.append({"war_code": war_code, "war_name": war_name, "percentage": percentage, "start_date": start_date, "end_date": end_date}) return military_eras except Exception as e: logger.error('Could not open {0}. Error: {1}'.format(wars_data_path, e))
def get_all_datasets(): """ get all datasets from cache :return: the rest api generated datasets """ datasets = [] global dataset_manager keys = dataset_manager.get_all_keys() for dataset_name in keys: dataset = dataset_manager.get_by_name(dataset_name) if dataset is None: continue dataset_parts = dataset_name.split('.') name = len( dataset_parts) > 1 and dataset_parts[1] or 'ERROR TO GET NAME' try: configurations = dataset_configuration_service.get_configuration_by_title( name) dataset['configuration'] = configurations datasets.append(dataset) except Exception as e: # if get configuration error, then skip this dataset, so we don't need raise error here logger.error(e) if len(datasets) > 0: datasets = sorted(datasets, key=lambda k: k['completedOn'], reverse=True) return datasets
def load_datasources(): """ Load datasource.json, military eras and ICD-10 code/name pairs, terminate if error occurs """ global data_source global military_eras # load datasource.json with generation rules for patient fields data_source_file = DATASOURCES_DIR + '/datasource.json' try: with open(data_source_file) as data_file: data_source = json.load(data_file, object_pairs_hook=OrderedDict) except Exception as e: logger.error('Could not open {0}. Error: {1}'.format( data_source_file, e)) if data_source is None or len(data_source) == 0: logger.error('Datasource not defined. Cannot continue.') exit(1) # load all war eras military_eras = get_wars_from_file() # load ICD-10 code/name pairs load_icd10_codes()
def preload_datasets(): """ preload all datasets into manager Get all dataset by scan output folder, if folder name start begin DATASET_PREFIX, that's mean this dataset generate by rest api If get configuration by title failed, then this api will skip the dataset :return: None """ datasets_folders = [ f for f in listdir(GENERATED_DATASETS_DIR) if isdir(join(GENERATED_DATASETS_DIR, f)) ] for dataset_name in datasets_folders: if not dataset_name.startswith( DATASET_PREFIX): # not generate by rest api continue dataset_parts = dataset_name.split('.') name = len( dataset_parts) > 1 and dataset_parts[1] or 'ERROR TO GET NAME' output_format = len(dataset_parts) > 3 and dataset_parts[3] or 'CCDA' try: configurations = dataset_configuration_service.get_configuration_by_title( name) dataset = { 'title': name, 'completedOn': datetime.fromtimestamp( stat(join(GENERATED_DATASETS_DIR, dataset_name)).st_mtime).isoformat(), 'configuration': configurations, 'status': DATASET_COMPLETED, 'progress': 100, 'outputFormat': output_format, 'datasetName': dataset_name } dataset_manager.push_entity(dataset_name, dataset) logger.info("succeed load dataset = " + dataset_name) except Exception as e: # if get configuration error, then skip this dataset, so we don't need raise error here logger.error(e)
def load_icd10_codes(): """ Read icd10cm_codes_2018.txt file, extract code/name pairs of known morbidities from it. The result is saved to the global icd_morbidity_name_by_code variable. :return: None """ global icd_morbidity_name_by_code icd_morbidity_name_by_code = {} # load the ICD-10 datasource try: logger.info('Reading ICD-10 datasource...') lines_num = 0 for line in open(ICD_10_CODES_FILE_PATH): code = line[:8].strip() name = line[8:].strip() icd_morbidity_name_by_code[code] = name lines_num += 1 logger.info('Loaded {0} records from ICD-10 datasource'.format(lines_num)) except Exception as e: logger.error('Could not open {0}. Error: {1}'.format(ICD_10_CODES_FILE_PATH, e)) raise
from rest.services.dataset_configuration_service import read_configuration_from_file from rest.services.dataset_configuration_service import get_configuration_by_title from rest.services.datasources_service import get_morbidities_from_study_profile_code if __name__ == '__main__': # parse command line options options = docopt(__doc__, version='1.0.0') # check configuration file parameters and read configuration file if required config_path = options['-c'] config_title = options['-t'] config = None if config_path and config_title: logger.error( 'Both configuration file path and configuration title were specified. ' + 'Only one of them can be provided at a time') exit(1) if config_path is not None: if not isfile(config_path): logger.error( "Configuration file {0} doesn't exist".format(config_path)) exit(1) try: config = read_configuration_from_file(config_path) except Exception as e: logger.error( "Error occurred while reading configuration file: {0}".format( e)) exit(1)