def get_all_datasets():
    """
    Get all dataset by scan output folder, if folder name start begin DATASET_PREFIX, that's mean this dataset generate
    by rest api, it should be returned.
    If get configuration by title failed, then this api will skip the dataset
    :return: the rest api generated datasets
    """
    datasets_folders = [f for f in listdir(GENERATED_DATASETS_DIR) if isdir(join(GENERATED_DATASETS_DIR, f))]
    datasets = []
    for dataset_name in datasets_folders:
        if not dataset_name.startswith(DATASET_PREFIX):  # not generate by rest api
            continue
        dataset_parts = dataset_name.split('.')
        name = len(dataset_parts) > 1 and dataset_parts[1] or 'ERROR TO GET NAME'
        output_format = len(dataset_parts) > 3 and dataset_parts[3] or 'CCDA'
        try:
            configurations = dataset_configuration_service.get_configuration_by_title(name)
            datasets.append({
                'title': name,
                'completedOn': datetime.fromtimestamp(
                    stat(join(GENERATED_DATASETS_DIR, dataset_name)).st_mtime).isoformat(),
                'configuration': configurations,
                'outputFormat': output_format,
                'datasetName': dataset_name
            })
        except Exception as e:
            # if get configuration error, then skip this dataset, so we don't need raise error here
            logger.error(e)
        if len(datasets) > 0:
            datasets = sorted(datasets, key=lambda k: k['completedOn'], reverse=True)
    return datasets
def get_morbidities_from_war_code(war_code, include_percentage=False):
    """
    Get morbidities by war code. If the result is already cached, then return the cached result.
    If not found raise EntityNotFoundError
    :param war_code: the war code
    :param include_percentage: True if percentage of morbidity should be added to output, False otherwise
    :return: the list with morbidities data
    """
    global morbidity_map

    if morbidity_map.get(war_code) is not None:  # return cached data
        return morbidity_map.get(war_code)

    file_path = DATASOURCES_DIR + '/morbidity_' + war_code + '.csv'
    try:
        with open(file_path, 'rU') as csv_file:
            morbidity_raw_list = csv.reader(csv_file, delimiter=',', quotechar='"')
            morbidity_list = []
            first = True
            for row in morbidity_raw_list:
                if first:  # skip first row with header
                    first = False
                    continue
                item = {'name': row[0], 'icd10Code': row[1]}
                if include_percentage:
                    item['percentOfProbabilityToAcquireDiagnosis'] = float(row[2])
                morbidity_list.append(item)
            morbidity_map[war_code] = morbidity_list
            return morbidity_list
    except Exception as e:
        logger.error(e)
        raise EntityNotFoundError('Could not open {0}. Error: {1}'.format(file_path, e))
def get_wars_from_file():
    """
    Get war eras from file. Return cache result if data is already cached.
    :return: the war eras
    """
    global military_eras
    if military_eras is not None:
        # return cached data
        return military_eras

    wars_data_path = DATASOURCES_DIR + '/military_eras.csv'
    try:
        # load the military_eras datasource
        with open(wars_data_path, 'rU') as csv_file:
            military_eras_list = csv.reader(csv_file, delimiter=',', quotechar='"')
            military_eras = []
            for row in military_eras_list:
                if row[0] == 'war_code':  # skip first element (titles row)
                    continue

                war_code = row[0]
                war_name = row[1]
                percentage = float(row[2])
                start_date = str_to_datetime(row[3])
                end_date = str_to_datetime(row[4])

                military_eras.append({"war_code": war_code,
                                      "war_name": war_name,
                                      "percentage": percentage,
                                      "start_date": start_date,
                                      "end_date": end_date})
            return military_eras
    except Exception as e:
        logger.error('Could not open {0}. Error: {1}'.format(wars_data_path, e))
def get_all_datasets():
    """
    get all datasets from cache
    :return: the rest api generated datasets
    """
    datasets = []
    global dataset_manager
    keys = dataset_manager.get_all_keys()
    for dataset_name in keys:
        dataset = dataset_manager.get_by_name(dataset_name)
        if dataset is None:
            continue
        dataset_parts = dataset_name.split('.')
        name = len(
            dataset_parts) > 1 and dataset_parts[1] or 'ERROR TO GET NAME'
        try:
            configurations = dataset_configuration_service.get_configuration_by_title(
                name)
            dataset['configuration'] = configurations
            datasets.append(dataset)
        except Exception as e:
            # if get configuration error, then skip this dataset, so we don't need raise error here
            logger.error(e)
        if len(datasets) > 0:
            datasets = sorted(datasets,
                              key=lambda k: k['completedOn'],
                              reverse=True)
    return datasets
Beispiel #5
0
def load_datasources():
    """
    Load datasource.json, military eras and ICD-10 code/name pairs, terminate if error occurs
    """
    global data_source
    global military_eras

    # load datasource.json with generation rules for patient fields
    data_source_file = DATASOURCES_DIR + '/datasource.json'
    try:
        with open(data_source_file) as data_file:
            data_source = json.load(data_file, object_pairs_hook=OrderedDict)
    except Exception as e:
        logger.error('Could not open {0}. Error: {1}'.format(
            data_source_file, e))

    if data_source is None or len(data_source) == 0:
        logger.error('Datasource not defined. Cannot continue.')
        exit(1)

    # load all war eras
    military_eras = get_wars_from_file()

    # load ICD-10 code/name pairs
    load_icd10_codes()
def preload_datasets():
    """
    preload all datasets into manager
    Get all dataset by scan output folder, if folder name start begin DATASET_PREFIX, that's mean this dataset generate
    by rest api
    If get configuration by title failed, then this api will skip the dataset
    :return: None
    """
    datasets_folders = [
        f for f in listdir(GENERATED_DATASETS_DIR)
        if isdir(join(GENERATED_DATASETS_DIR, f))
    ]
    for dataset_name in datasets_folders:
        if not dataset_name.startswith(
                DATASET_PREFIX):  # not generate by rest api
            continue
        dataset_parts = dataset_name.split('.')
        name = len(
            dataset_parts) > 1 and dataset_parts[1] or 'ERROR TO GET NAME'
        output_format = len(dataset_parts) > 3 and dataset_parts[3] or 'CCDA'
        try:
            configurations = dataset_configuration_service.get_configuration_by_title(
                name)
            dataset = {
                'title':
                name,
                'completedOn':
                datetime.fromtimestamp(
                    stat(join(GENERATED_DATASETS_DIR,
                              dataset_name)).st_mtime).isoformat(),
                'configuration':
                configurations,
                'status':
                DATASET_COMPLETED,
                'progress':
                100,
                'outputFormat':
                output_format,
                'datasetName':
                dataset_name
            }
            dataset_manager.push_entity(dataset_name, dataset)
            logger.info("succeed load dataset = " + dataset_name)
        except Exception as e:
            # if get configuration error, then skip this dataset, so we don't need raise error here
            logger.error(e)
Beispiel #7
0
def load_icd10_codes():
    """
    Read icd10cm_codes_2018.txt file, extract code/name pairs of known morbidities from it.
    The result is saved to the global icd_morbidity_name_by_code variable.
    :return: None
    """
    global icd_morbidity_name_by_code
    icd_morbidity_name_by_code = {}

    # load the ICD-10 datasource
    try:
        logger.info('Reading ICD-10 datasource...')
        lines_num = 0
        for line in open(ICD_10_CODES_FILE_PATH):
            code = line[:8].strip()
            name = line[8:].strip()
            icd_morbidity_name_by_code[code] = name
            lines_num += 1
        logger.info('Loaded {0} records from ICD-10 datasource'.format(lines_num))
    except Exception as e:
        logger.error('Could not open {0}. Error: {1}'.format(ICD_10_CODES_FILE_PATH, e))
        raise
from rest.services.dataset_configuration_service import read_configuration_from_file
from rest.services.dataset_configuration_service import get_configuration_by_title
from rest.services.datasources_service import get_morbidities_from_study_profile_code

if __name__ == '__main__':
    # parse command line options
    options = docopt(__doc__, version='1.0.0')

    # check configuration file parameters and read configuration file if required
    config_path = options['-c']
    config_title = options['-t']
    config = None
    if config_path and config_title:
        logger.error(
            'Both configuration file path and configuration title were specified. '
            + 'Only one of them can be provided at a time')
        exit(1)

    if config_path is not None:
        if not isfile(config_path):
            logger.error(
                "Configuration file {0} doesn't exist".format(config_path))
            exit(1)
        try:
            config = read_configuration_from_file(config_path)
        except Exception as e:
            logger.error(
                "Error occurred while reading configuration file: {0}".format(
                    e))
            exit(1)