current_dt = time.strftime("%Y-%m-%dT%H-%M") score_df = score_df.selectExpr("'{}' as model_name".format(model_name), "'{}' as current_dt".format(current_dt), '*') print('scores generated: {}'.format(score_df.count())) print('{} saving scores ...'.format(time.strftime("%Y-%m-%d %H:%M:%S"))) spark_utils.write(conf['target'], score_df) print('execution time: {} sec'.format(time.time() - start)) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--conf', required=True) args, overrides = parser.parse_known_args() file_conf = ConfigFactory.parse_file(args.conf, resolve=False) overrides = ','.join(overrides) over_conf = ConfigFactory.parse_string(overrides) conf = over_conf.with_fallback(file_conf) sc, sqc = spark_utils.init_session(conf['spark'], app=os.path.basename(args.conf), return_context=True) run_scorer(sc, sqc, conf)
from pyhocon import ConfigFactory import sparktools.core as spark_utils start = time.time() print('{tm} ------------------- {nm} started'.format( tm=time.strftime("%Y-%m-%d %H:%M:%S"), nm=os.path.basename(__file__))) parser = argparse.ArgumentParser() parser.add_argument('--conf', required=True) args, overrides = parser.parse_known_args() file_conf = ConfigFactory.parse_file(args.conf, resolve=False) overrides = ','.join(overrides) over_conf = ConfigFactory.parse_string(overrides) conf = over_conf.with_fallback(file_conf) sqc = spark_utils.init_session(conf['spark'], app=os.path.basename(args.conf)) print('{tm} moving data...'.format(tm=time.strftime("%Y-%m-%d %H:%M:%S"))) sdf = spark_utils.define_data_frame(conf['source'], sqc) spark_utils.write(conf['target'], sdf) print('data set size: {sz}'.format(sz=sdf.count())) print( '{tm} download is finished'.format(tm=time.strftime("%Y-%m-%d %H:%M:%S"))) print('execution time: {} sec'.format(time.time() - start))