Example #1
0
    def test_get_customized_file_name(self):
        """
        Test to check valid file name in customized way.
        """
        # Case 1: when only one string needs to be customized
        # example: "NAME.CSV"
        # Setup
        names = "Test"
        extension = "CSV"
        expected_file_name = "Test.CSV"

        # Call to func :func:`get_customized_file_name`
        actual_file_name = \
            get_customized_file_name(names, extension=extension)

        # Assert for valid customized file name
        self.assertEqual(expected_file_name, actual_file_name)

        # Case 2: when only multiple string needs to be customized
        # example: "1993_NAMCS_RAW_DATASET_FILE_NAME.CSV"
        # Setup
        names = (1993, "NAMCS", "RAW", "DATASET", "FILE", "NAME")
        separator = "_"
        extension = "csv"
        expected_file_name = "1993_NAMCS_RAW_DATASET_FILE_NAME.csv"

        # Call to func :func:`get_customized_file_name`
        actual_file_name = get_customized_file_name(
                names, separator=separator, extension=extension
        )

        # Assert for valid customized file name
        self.assertEqual(expected_file_name, actual_file_name)
Example #2
0
def download_namcs_zipfile(namcs_year, download_path=DOWNLOADED_FILES_DIR_PATH):
    """
    For a given year, download the zipped NAMCS data file into
    `download_path` from public CDC server.

    Parameters:
        namcs_year (:class:`int`): Year for which NAMCS dataset file will
            be downloaded from public CDC server.
        download_path (:class:`str`): Download location path for downloaded
            zip files.

    Returns:
        :class:`str`: Downloaded zipped dataset file path for `year`.

    Note:
        >>> from hdx_ahcd.namcs.config import DOWNLOADED_FILES_DIR_PATH
        >>> DOWNLOADED_FILES_DIR_PATH
        "~/.hdx_ahcd/data/downloaded_files"
    """
    url = get_namcs_source_file_info(namcs_year).get("url")
    zip_file_name = \
        get_customized_file_name("NAMCS", "DATA", namcs_year, extension="zip")
    full_file_name = os.path.join(download_path, zip_file_name)
    log.info("Downloading file: {} for year: {}".format(url, namcs_year))

    # Handle exception in call to :func:`urlretrieve`
    with try_except():
        request.urlretrieve(url, full_file_name)
    return full_file_name
Example #3
0
def delete_namcs_zipfile(year, download_path=DOWNLOADED_FILES_DIR_PATH):
    """
    For a given `year`, delete the downloaded zipped NAMCS data set file.

    Parameters:
        year (:class:`int`): Year for which downloaded zipped dataset file will
            be deleted.
        download_path (:class:`str`): Downloaded zipped dataset file path for
            `year`.
    """
    zip_file_name = \
        get_customized_file_name("NAMCS", "DATA", year, extension="zip")
    full_file_name = os.path.join(download_path, zip_file_name)

    if not os.path.exists(full_file_name):
        raise Exception("Zip file for year: {} ,does not"
                        "exists at: {}".format(year, download_path))

    with try_except():
        log.debug("Deleting zip file: {} for"
                  "year: {}".format(full_file_name, year))
        os.remove(full_file_name)
Example #4
0
def export_to_csv(year, generator_object):
    """
    Method to export the translated NAMCS patient case data into CSV file for a
    given year.

    Parameters:
        year (:class:`int`): Year for which translated NAMCS data will be 
            exported to csv.
        generator_object (:class:`generator`): Generator object containing
            translated NAMCS patient case data for `year`.

    Returns:
        :class:`str`: Absolute path of exported csv file.
    """
    # Constructing source file name on the basis of year specified
    source_file_id = get_normalized_namcs_file_name(year)

    # Absolute path of file where data is exported
    translated_csv_file = os.path.join(
        NAMCS_DATA_DIR_PATH, 
        get_customized_file_name(
            source_file_id, CONVERTED_CSV_FILE_NAME_SUFFIX, extension="csv"
        )
    )

    with try_except():
        # Write all the translated records into CSV file
        with open(translated_csv_file, "w") as csv_file:
            writer = csv.DictWriter(csv_file,
                                    delimiter = ",",
                                    fieldnames = CONVERTED_CSV_FIELDS)
            writer.writeheader()
            for translated_record in generator_object:
                writer.writerow(translated_record)
            log.info("Finished writing to the file %s" % translated_csv_file)

    return os.path.realpath(translated_csv_file)
Example #5
0
def get_generator_by_year(year, namcs_raw_dataset_file=None):
    """
    Method to translate raw NAMCS patient case data for a given year in human 
    readable form.

    Parameters:
        year (:class:`int`): NAMCS year for which raw NAMCS data needs to be
            translated.
        namcs_raw_dataset_file (:class:`str`): Absolute path of
            raw dataset input file. If not specified, local file path will be
            deduced on the basis of `year` specified by user.
            Note: Local (extracted) file must exists for this method to yield
                desired response.

    Returns:
        :class:`generator`: Generator object containing translated
        raw NAMCS patient case data for given year.

    Raises:
        :class:`Exception`: If some of attributes/fields are not
        implemented in the class for `year`, exception is raised
        For example if :class:`Year1973` doesn't implement attribute
        `gender` an exception will be raised.
    """
    dataset_file = namcs_raw_dataset_file if namcs_raw_dataset_file is not None \
        else get_namcs_dataset_path_for_year(year)
    # Constructing source file name on the basis of year specified
    source_file_id = get_normalized_namcs_file_name(year)
    # Error file name to dump the rejected data set
    error_file = os.path.join(
        ERROR_FILES_DIR_PATH, 
        get_customized_file_name(source_file_id, extension="err")
    )

    # Removing existing error file to avoid confusion
    if os.path.exists(error_file):
        with try_except():
            os.remove(error_file)

    # Check if data set file exist before processing
    if os.path.exists(dataset_file):
        with open(dataset_file, "r") as dataset_file_handler:
            errors = []
            with try_except(TypeError, re_raise=True):
                # Get the specific year class from module years                
                year_class_object = vars(years).get("Year{}".format(year))

            # Get the mappings from year class
            field_mappings = year_class_object.get_field_slice_mapping()

            for record_no, record in safe_read_file(dataset_file_handler):
                translated_record = {
                    NAMCSFieldEnum.SOURCE_FILE_ID.value: source_file_id,
                    NAMCSFieldEnum.SOURCE_FILE_ROW.value: record_no + 1
                }
                try:
                    for field_name, slice_object in field_mappings.items():
                        # Evaluate `field_name` which is collection mappings
                        if isinstance(slice_object, (list, tuple)):
                            translated_code = process_multiple_slice_objects(
                                record, field_name, slice_object
                            )
                        else:
                            translated_code = get_field_code_from_record(
                                record, field_name, slice_object
                            )
                        translated_record[field_name] = translated_code

                    # Populate all `CONVERTED_CSV_FIELDS` for `record`
                    translated_record = populate_missing_fields(
                        CONVERTED_CSV_FIELDS,
                        translated_record
                    )

                    # Case : Removing blank `physician diagnoses` codes from
                    # `translated_record`
                    # Fetching `field_name` whose `translated_code` is `list`
                    # in `translated_record`
                    for field_name in filter(
                        lambda key: isinstance(translated_record[key], list),
                        translated_record
                    ):
                        # Removing blank, empty element from `translated_code`
                        # and reassigning new value to
                        # `translated_record[field_name]`
                        translated_record[field_name] = list(
                            filter(len, translated_record[field_name])
                        )
                except Exception as exc:
                    detailed_exception_info(logger=log)
                    errors.append(
                        {
                            NAMCSErrorFieldEnum.RECORD_NUMBER.value:
                                record_no + 1,
                            NAMCSErrorFieldEnum.RECORD.value: record,
                            NAMCSErrorFieldEnum.EXCEPTION.value: str(exc)
                        }
                    )
                yield translated_record

            # Check if any records was rejected during NAMCS data set processing
            # due to erroneous field value
            if errors:
                # TODO: Discard record or replace None value for erroneous field
                with open(error_file, "w") as error_file_handler:
                    # Error file headers
                    error_file_headers = (
                        NAMCSErrorFieldEnum.RECORD_NUMBER.value,
                        NAMCSErrorFieldEnum.EXCEPTION.value,
                        NAMCSErrorFieldEnum.RECORD.value
                    )
                    writer = csv.DictWriter(
                        error_file_handler,
                        delimiter = ",",
                        fieldnames = error_file_headers
                    )
                    writer.writeheader()
                    for _error in errors:
                        writer.writerow(_error)
                    log.info("Finished writing to error file {}".format(
                        error_file))
def namcs_regression_test():
    """
    Invoke method `get_cleaned_data_by_year` for all NAMCS years configured
    by parameter `YEARS_AVAILABLE` and report errors occurred during execution
    for any NAMCS year it includes reporting of errors occurred while accessing
    `generator` returned by `get_cleaned_data_by_year`.If no errors are occurred
    append all the records for in the output file denoted by `TSV_FILE_PATH`.
    On successful execution file `TSV_FILE_PATH` will contain all records for
    for all NAMCS years.

    Note:
        This is strictly for dev  purpose, no actual test case or test suite
        are used to perform regression.
    """
    with open(TSV_FILE_PATH, "w") as file_handle:
        tsv_writer = csv.DictWriter(
            file_handle, fieldnames=CONVERTED_CSV_FIELDS, delimiter="\t"
        )
        tsv_writer.writeheader()

        LOG.info(
            "Processing namcs data for all years: {}\n".format(YEARS_AVAILABLE)
        )

        namcs_data_all_years = get_cleaned_data_by_year()
        for year in YEARS_AVAILABLE:
            LOG.debug("Processing year: {}".format(year))
            namcs_data_year = namcs_data_all_years.get(year)
            try:
                LOG.debug("Using generator for year: {}".format(year))
                translated_data_gen_obj = namcs_data_year.get("generator")
            except Exception as exc:
                LOG.error(
                    "Error: '{}', while processing generator for year:{}, "
                    "moving to next year".format(str(exc), year)
                )
                continue
            LOG.debug("Writing data to tsv file.")
            for record_no, record in enumerate(translated_data_gen_obj):
                try:
                    tsv_writer.writerow(record)
                except Exception as exc:
                    LOG.error(
                        "Error: '{}' in writing record: [{}]\nRecord no:[{}] "
                        "\nFor year:[{}]".format(
                            str(exc), record, record_no+1, year
                        )
                    )
            else:
                LOG.info(
                    "Total records:[{}] written for year: [{}]".format(
                        record_no+1, year
                    )
                )

            source_file_id = get_normalized_namcs_file_name(year)
            # Error file path
            error_file = os.path.join(
                ERROR_FILES_DIR_PATH,
                get_customized_file_name(source_file_id, extension="err")
            )
            if os.path.exists(error_file):
                LOG.error(
                    "Error: error file for year: {} is "
                    "generated.".format(error_file)
                )
    LOG.info("Data for all namcs years: [{}]".format(TSV_FILE_PATH))