def dates_from_rs_status(status_helper, db, logstream, retry_on_err, single_date=None): """ date_from_rs_status gets the jobs that have completed the et step, but have not started the load step, and have no jobs before them running or in error Args: status_helper -- a wrapper around a backing store to aid in CRUD db -- is the database we query logstream -- a PipelineStreamLogger retry_on_err -- a boolean, True if we're retrying on errors single_date -- date string of the form YYYY-MM-DD if we're \ only looking for one Returns: a list of dates to catch up on formatted as strings YYYY/MM/DD """ versions = get_yaml_table_versions(pipeline_yaml_schema_file_path()) if single_date is not None: data_date = get_formatted_date(single_date) if data_date is None: handle_error("bad input date: {0}".format(single_date), logstream) start_datetime = datetime.strptime(data_date, "%Y/%m/%d") status_tuples = \ status_helper.query_et_complete_job(db, versions, data_date) else: days_back = read_int('pipeline.load_step.days_to_check') + 1 start_datetime = datetime.utcnow() - timedelta(days=days_back) status_tuples = \ status_helper.query_et_complete_jobs(db, versions, start_datetime) if status_tuples is False: handle_error( "query for complete et job failed, version={0}, date={1}".format( versions, data_date if single_date is not None else start_datetime), logstream) candidates = [] last_date = (start_datetime - timedelta(days=1)).strftime("%Y/%m/%d") for ddate, ld_status in status_tuples: if not one_day_greater(ddate, last_date): break elif ld_status is None or (ld_status == 'error' and retry_on_err): candidates.append(ddate) elif ld_status == 'error': break last_date = ddate candidate_string = "candidates dates for load: {0}".format(candidates) logstream.write_msg(status='running', extra_msg=candidate_string) return candidates
def dates_from_rs_status(status_helper, db, logstream, retry_on_err, single_date=None): """ date_from_rs_status gets the jobs that have completed the et step, but have not started the load step, and have no jobs before them running or in error Args: status_helper -- a wrapper around a backing store to aid in CRUD db -- is the database we query logstream -- a PipelineStreamLogger retry_on_err -- a boolean, True if we're retrying on errors single_date -- date string of the form YYYY-MM-DD if we're \ only looking for one Returns: a list of dates to catch up on formatted as strings YYYY/MM/DD """ versions = get_yaml_table_versions(pipeline_yaml_schema_file_path()) if single_date is not None: data_date = get_formatted_date(single_date) if data_date is None: handle_error("bad input date: {0}".format(single_date), logstream) start_datetime = datetime.strptime(data_date, "%Y/%m/%d") status_tuples = \ status_helper.query_et_complete_job(db, versions, data_date) else: days_back = read_int('pipeline.load_step.days_to_check') + 1 start_datetime = datetime.utcnow() - timedelta(days=days_back) status_tuples = \ status_helper.query_et_complete_jobs(db, versions, start_datetime) if status_tuples is False: handle_error( "query for complete et job failed, version={0}, date={1}".format( versions, data_date if single_date is not None else start_datetime ), logstream ) candidates = [] last_date = (start_datetime - timedelta(days=1)).strftime("%Y/%m/%d") for ddate, ld_status in status_tuples: if not one_day_greater(ddate, last_date): break elif ld_status is None or (ld_status == 'error' and retry_on_err): candidates.append(ddate) elif ld_status == 'error': break last_date = ddate candidate_string = "candidates dates for load: {0}".format(candidates) logstream.write_msg(status='running', extra_msg=candidate_string) return candidates
def setup_dates_to_check(date_with_dashes, local, logstream): """ setup_dates_to_check follows this logic: if there's a data_date check it and if it's ok return that if not then log an error for a bad date raise exception Args: date_with_dashes -- a date string of the form 'YYYY-MM-DD' local -- whether we're running on locally (dev box) or not logstream -- a PipelineStreamLogger Returns: a string of the form YYYY/MM/DD """ input_date = get_formatted_date(date_with_dashes) if input_date: return input_date error_msg = "input date {0} is invalid".format(date_with_dashes) logstream.write_msg("error", error_msg=error_msg) clear_env(local) raise Exception(error_msg)
def test_get_twodaysago(): expected_value = (datetime.now() - timedelta(days=2)).strftime("%Y/%m/%d") output_under_test = get_formatted_date('twodaysago') assert output_under_test == expected_value
def test_get_yesterday(): expected_value = (datetime.now() - timedelta(days=1)).strftime("%Y/%m/%d") output_under_test = get_formatted_date('yesterday') assert output_under_test == expected_value
def test_get_formatted_date(input_value, expected_value): output_under_test = get_formatted_date(input_value) assert output_under_test == expected_value