def _process_row_series(row, rule, pattern, required_attr, config): """ Compute the sha256 string for one rule. .. seealso:: :`_process_frame` :param config: dictionary with run-time parameters :rtype: string :return sha_string: """ patient = NormalizedPatient( patid=row[COL_PATID], pat_first_name=row[COL_FIRST], pat_last_name=row[COL_LAST], pat_birth_date=HashGenerator.format_date(row[COL_DOB]), pat_sex=row[COL_SEX], pat_race=HashGenerator.format_race(row[COL_RACE])) if not patient.has_all_data(required_attr): # print("Skip hashing patient [{}] due to missing data for rule [{}]".format(patient.patid, rule)) # noqa return '' raw = pattern.format(patient) + config['SALT'] sha_string = utils.apply_sha256(raw) # print("For patient [{}] (rule {}): {}, hash_string= {}".format(patient.patid, rule, raw, sha_string)) # noqa return sha_string
def _process_row_series(cls, ser, rule, pattern, required_attr, config): """ Compute the sha string for one rule. :param config: dictionary with run-time parameters :rtype: string :return sha_string: """ patient = NormalizedPatient(patid=ser['patid'], pat_first_name=ser['first'], pat_last_name=ser['last'], pat_birth_date=ser['dob'], pat_sex=ser['sex'], pat_race=ser['race']) # cls.log.info("Patient: {}".format(patient)) if not patient.has_all_data(required_attr): cls.log.debug("Skip hashing patient [{}] due to missing data" "for rule [{}]".format(patient.patid, rule)) return '' raw = pattern.format(patient) + config['SALT'] sha_string = utils.apply_sha256(raw) cls.log.debug("For patient [{}] (rule {}): {}, hash_string= {}".format( patient.patid, rule, raw, sha_string)) # noqa return sha_string
def main(): """ Configure the logger object and read the command line arguments for invoking the generator. .. seealso:: :meth:`HashGenerator.generate` """ HashGenerator.configure_logger(logger) NormalizedPatient.configure_logger(logger) parser = argparse.ArgumentParser() parser.add_argument("-v", "--version", default=False, action='store_true', help="Show the version number") parser.add_argument("-c", "--config", default=DEFAULT_SETTINGS_FILE, help="Application config file") parser.add_argument( '-i', '--inputdir', # required=True, default='.', help='input directory name') parser.add_argument( '-o', '--outputdir', # required=True, default='.', help='output directory name') args = parser.parse_args() if args.version: import sys print("deduper, version {}".format(__version__)) sys.exit() config = Config(root_path=ROOT_PATH, defaults={}) config.from_pyfile(args.config) start = time.monotonic() success = HashGenerator.generate(config, args.inputdir, args.outputdir) end = time.monotonic() elapsed = (end - start) if success: logger.info("Done. Process duration: {}".format( str(timedelta(seconds=elapsed)))) else: logger.error("Failed!")
def setUp(self): super(TestHashGenerator, self).setUp() HashGenerator.configure_logger(logger) NormalizedPatient.configure_logger(logger)