def s3_to_psv_main(args): mrjob = read_string('pipeline.et_step.mrjob') stream_name = read_string('pipeline.et_step.s3_to_s3_stream') DATABASE = read_string('pipeline.redshift_database') LOG_STREAM = PipelineStreamLogger( stream_name, args.run_local, mrjob, input_date=args.date ) day_to_run = setup_dates_to_check(args.date, args.run_local, LOG_STREAM) try: if not args.run_local: setup_private(args.private) # Create a psql instance based on args if args.skip_progress_in_redshift: status_table = DynamoDbStatusTable( LOG_STREAM, run_local=args.run_local ) else: status_table = RedshiftStatusTable( RedshiftPostgres( LOG_STREAM, args.private, run_local=args.run_local ) ) load_msg = __load_data_from_s3( status_table, read_list('pipeline.et_step.s3_prefixes'), day_to_run, mrjob, args.run_local, DATABASE, LOG_STREAM, force_et=args.force_et ) LOG_STREAM.write_msg("complete", extra_msg=load_msg) finally: clear_env(args.run_local)
def s3_to_psv_main(args): mrjob = read_string('pipeline.et_step.mrjob') stream_name = read_string('pipeline.et_step.s3_to_s3_stream') DATABASE = read_string('pipeline.redshift_database') LOG_STREAM = PipelineStreamLogger(stream_name, args.run_local, mrjob, input_date=args.date) day_to_run = setup_dates_to_check(args.date, args.run_local, LOG_STREAM) try: if not args.run_local: setup_private(args.private) # Create a psql instance based on args if args.skip_progress_in_redshift: status_table = DynamoDbStatusTable(LOG_STREAM, run_local=args.run_local) else: status_table = RedshiftStatusTable( RedshiftPostgres(LOG_STREAM, args.private, run_local=args.run_local)) load_msg = __load_data_from_s3( status_table, read_list('pipeline.et_step.s3_prefixes'), day_to_run, mrjob, args.run_local, DATABASE, LOG_STREAM, force_et=args.force_et) LOG_STREAM.write_msg("complete", extra_msg=load_msg) finally: clear_env(args.run_local)