Beispiel #1
0
    def _process_row_series(row, rule, pattern, required_attr, config):
        """
        Compute the sha256 string for one rule.

        .. seealso::
            :`_process_frame`

        :param config: dictionary with run-time parameters
        :rtype: string
        :return sha_string:
        """
        patient = NormalizedPatient(
            patid=row[COL_PATID],
            pat_first_name=row[COL_FIRST],
            pat_last_name=row[COL_LAST],
            pat_birth_date=HashGenerator.format_date(row[COL_DOB]),
            pat_sex=row[COL_SEX],
            pat_race=HashGenerator.format_race(row[COL_RACE]))

        if not patient.has_all_data(required_attr):
            # print("Skip hashing patient [{}] due to missing data for rule [{}]".format(patient.patid, rule))  # noqa
            return ''

        raw = pattern.format(patient) + config['SALT']
        sha_string = utils.apply_sha256(raw)
        # print("For patient [{}] (rule {}): {}, hash_string= {}".format(patient.patid, rule, raw, sha_string))  # noqa

        return sha_string
    def _process_row_series(cls, ser, rule, pattern, required_attr, config):
        """
        Compute the sha string for one rule.

        :param config: dictionary with run-time parameters
        :rtype: string
        :return sha_string:
        """

        patient = NormalizedPatient(patid=ser['patid'],
                                    pat_first_name=ser['first'],
                                    pat_last_name=ser['last'],
                                    pat_birth_date=ser['dob'],
                                    pat_sex=ser['sex'],
                                    pat_race=ser['race'])
        # cls.log.info("Patient: {}".format(patient))

        if not patient.has_all_data(required_attr):
            cls.log.debug("Skip hashing patient [{}] due to missing data"
                          "for rule [{}]".format(patient.patid, rule))
            return ''

        raw = pattern.format(patient) + config['SALT']
        sha_string = utils.apply_sha256(raw)
        cls.log.debug("For patient [{}] (rule {}): {}, hash_string= {}".format(
            patient.patid, rule, raw, sha_string))  # noqa

        return sha_string
Beispiel #3
0
def main():
    """
    Configure the logger object and read the command line arguments
    for invoking the generator.

    .. seealso::

        :meth:`HashGenerator.generate`

    """
    HashGenerator.configure_logger(logger)
    NormalizedPatient.configure_logger(logger)

    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--version",
                        default=False,
                        action='store_true',
                        help="Show the version number")

    parser.add_argument("-c",
                        "--config",
                        default=DEFAULT_SETTINGS_FILE,
                        help="Application config file")
    parser.add_argument(
        '-i',
        '--inputdir',
        # required=True,
        default='.',
        help='input directory name')
    parser.add_argument(
        '-o',
        '--outputdir',
        # required=True,
        default='.',
        help='output directory name')

    args = parser.parse_args()

    if args.version:
        import sys
        print("deduper, version {}".format(__version__))
        sys.exit()

    config = Config(root_path=ROOT_PATH, defaults={})
    config.from_pyfile(args.config)
    start = time.monotonic()
    success = HashGenerator.generate(config, args.inputdir, args.outputdir)
    end = time.monotonic()
    elapsed = (end - start)

    if success:
        logger.info("Done. Process duration: {}".format(
            str(timedelta(seconds=elapsed))))
    else:
        logger.error("Failed!")
 def setUp(self):
     super(TestHashGenerator, self).setUp()
     HashGenerator.configure_logger(logger)
     NormalizedPatient.configure_logger(logger)