Example #1
0
def dates_from_rs_status(status_helper,
                         db,
                         logstream,
                         retry_on_err,
                         single_date=None):
    """
    date_from_rs_status gets the jobs that have completed the et step, but
    have not started the load step, and have no jobs before them running or
    in error

    Args:
    status_helper -- a wrapper around a backing store to aid in CRUD
    db -- is the database we query
    logstream -- a PipelineStreamLogger
    retry_on_err -- a boolean, True if we're retrying on errors
    single_date -- date string of the form YYYY-MM-DD if we're \
        only looking for one

    Returns:
    a list of dates to catch up on formatted as strings YYYY/MM/DD
    """
    versions = get_yaml_table_versions(pipeline_yaml_schema_file_path())

    if single_date is not None:
        data_date = get_formatted_date(single_date)
        if data_date is None:
            handle_error("bad input date: {0}".format(single_date), logstream)
        start_datetime = datetime.strptime(data_date, "%Y/%m/%d")
        status_tuples = \
            status_helper.query_et_complete_job(db, versions, data_date)
    else:
        days_back = read_int('pipeline.load_step.days_to_check') + 1
        start_datetime = datetime.utcnow() - timedelta(days=days_back)
        status_tuples = \
            status_helper.query_et_complete_jobs(db, versions, start_datetime)

    if status_tuples is False:
        handle_error(
            "query for complete et job failed, version={0}, date={1}".format(
                versions,
                data_date if single_date is not None else start_datetime),
            logstream)

    candidates = []
    last_date = (start_datetime - timedelta(days=1)).strftime("%Y/%m/%d")
    for ddate, ld_status in status_tuples:
        if not one_day_greater(ddate, last_date):
            break
        elif ld_status is None or (ld_status == 'error' and retry_on_err):
            candidates.append(ddate)
        elif ld_status == 'error':
            break
        last_date = ddate
    candidate_string = "candidates dates for load: {0}".format(candidates)
    logstream.write_msg(status='running', extra_msg=candidate_string)
    return candidates
Example #2
0
def dates_from_rs_status(status_helper, db, logstream,
                         retry_on_err, single_date=None):
    """
    date_from_rs_status gets the jobs that have completed the et step, but
    have not started the load step, and have no jobs before them running or
    in error

    Args:
    status_helper -- a wrapper around a backing store to aid in CRUD
    db -- is the database we query
    logstream -- a PipelineStreamLogger
    retry_on_err -- a boolean, True if we're retrying on errors
    single_date -- date string of the form YYYY-MM-DD if we're \
        only looking for one

    Returns:
    a list of dates to catch up on formatted as strings YYYY/MM/DD
    """
    versions = get_yaml_table_versions(pipeline_yaml_schema_file_path())

    if single_date is not None:
        data_date = get_formatted_date(single_date)
        if data_date is None:
            handle_error("bad input date: {0}".format(single_date), logstream)
        start_datetime = datetime.strptime(data_date, "%Y/%m/%d")
        status_tuples = \
            status_helper.query_et_complete_job(db, versions, data_date)
    else:
        days_back = read_int('pipeline.load_step.days_to_check') + 1
        start_datetime = datetime.utcnow() - timedelta(days=days_back)
        status_tuples = \
            status_helper.query_et_complete_jobs(db, versions, start_datetime)

    if status_tuples is False:
        handle_error(
            "query for complete et job failed, version={0}, date={1}".format(
                versions,
                data_date if single_date is not None else start_datetime
            ),
            logstream
        )

    candidates = []
    last_date = (start_datetime - timedelta(days=1)).strftime("%Y/%m/%d")
    for ddate, ld_status in status_tuples:
        if not one_day_greater(ddate, last_date):
            break
        elif ld_status is None or (ld_status == 'error' and retry_on_err):
            candidates.append(ddate)
        elif ld_status == 'error':
            break
        last_date = ddate
    candidate_string = "candidates dates for load: {0}".format(candidates)
    logstream.write_msg(status='running', extra_msg=candidate_string)
    return candidates
Example #3
0
def setup_dates_to_check(date_with_dashes, local, logstream):
    """
    setup_dates_to_check follows this logic:

    if there's a data_date check it and if it's ok return that

    if not then log an error for a bad date raise exception

    Args:
    date_with_dashes -- a date string of the form 'YYYY-MM-DD'
    local -- whether we're running on locally (dev box) or not
    logstream -- a PipelineStreamLogger

    Returns:
    a string of the form YYYY/MM/DD
    """
    input_date = get_formatted_date(date_with_dashes)
    if input_date:
        return input_date

    error_msg = "input date {0} is invalid".format(date_with_dashes)
    logstream.write_msg("error", error_msg=error_msg)
    clear_env(local)
    raise Exception(error_msg)
Example #4
0
def setup_dates_to_check(date_with_dashes, local, logstream):
    """
    setup_dates_to_check follows this logic:

    if there's a data_date check it and if it's ok return that

    if not then log an error for a bad date raise exception

    Args:
    date_with_dashes -- a date string of the form 'YYYY-MM-DD'
    local -- whether we're running on locally (dev box) or not
    logstream -- a PipelineStreamLogger

    Returns:
    a string of the form YYYY/MM/DD
    """
    input_date = get_formatted_date(date_with_dashes)
    if input_date:
        return input_date

    error_msg = "input date {0} is invalid".format(date_with_dashes)
    logstream.write_msg("error", error_msg=error_msg)
    clear_env(local)
    raise Exception(error_msg)
Example #5
0
def test_get_twodaysago():
    expected_value = (datetime.now() - timedelta(days=2)).strftime("%Y/%m/%d")
    output_under_test = get_formatted_date('twodaysago')
    assert output_under_test == expected_value
Example #6
0
def test_get_yesterday():
    expected_value = (datetime.now() - timedelta(days=1)).strftime("%Y/%m/%d")
    output_under_test = get_formatted_date('yesterday')
    assert output_under_test == expected_value
Example #7
0
def test_get_formatted_date(input_value, expected_value):
    output_under_test = get_formatted_date(input_value)
    assert output_under_test == expected_value
Example #8
0
def test_get_twodaysago():
    expected_value = (datetime.now() - timedelta(days=2)).strftime("%Y/%m/%d")
    output_under_test = get_formatted_date('twodaysago')
    assert output_under_test == expected_value
Example #9
0
def test_get_yesterday():
    expected_value = (datetime.now() - timedelta(days=1)).strftime("%Y/%m/%d")
    output_under_test = get_formatted_date('yesterday')
    assert output_under_test == expected_value
Example #10
0
def test_get_formatted_date(input_value, expected_value):
    output_under_test = get_formatted_date(input_value)
    assert output_under_test == expected_value