Example #1
0
def test_index_date():

    expected_indx_dates = {
        19: datetime.datetime(3397, 8, 4, 0, 0),
        99: datetime.datetime(2727, 3, 17, 0, 0),
        24581: datetime.datetime(3401, 6, 23, 0, 0),
        3014: datetime.datetime(2581, 5, 1, 0, 0)
    }
    calculate_index_date(events_df, mortality_df, deliverables_path)

    indx_date_df = pd.read_csv(deliverables_path + 'etl_index_dates.csv')
    indx_date_df['indx_date'] = indx_date_df['indx_date'].apply(date_convert)

    indx_date = dict(zip(indx_date_df.patient_id, indx_date_df.indx_date))

    if isinstance(indx_date, pd.DataFrame):
        indx_date = dict(zip(indx_date.patient_id, indx_date.indx_date))
    res = True

    if len(indx_date) != len(expected_indx_dates):
        res = False
    else:
        for key, value in expected_indx_dates.iteritems():
            if key not in indx_date:
                res = False
                break
            if not abs(indx_date[key].date() == value.date()):
                res = False
                break

    eq_(res, True, "Index dates do not match")
Example #2
0
def test_index_date():

    expected_indx_dates = {
        19: datetime.datetime(3397, 8, 4, 0, 0),
        99: datetime.datetime(2727, 3, 17, 0, 0),
        24581: datetime.datetime(3401, 6, 23, 0, 0),
        3014: datetime.datetime(2581, 5, 1, 0, 0),
    }
    calculate_index_date(events_df, mortality_df, deliverables_path)

    indx_date_df = pd.read_csv(deliverables_path + "etl_index_dates.csv")
    indx_date_df["indx_date"] = indx_date_df["indx_date"].apply(date_convert)

    indx_date = dict(zip(indx_date_df.patient_id, indx_date_df.indx_date))

    if isinstance(indx_date, pd.DataFrame):
        indx_date = dict(zip(indx_date.patient_id, indx_date.indx_date))
    res = True

    if len(indx_date) != len(expected_indx_dates):
        res = False
    else:
        for key, value in expected_indx_dates.iteritems():
            if key not in indx_date:
                res = False
                break
            if not abs(indx_date[key].date() == value.date()):
                res = False
                break

    eq_(res, True, "Index dates do not match")
Example #3
0
def test_aggregate_events():

    expected_data = []
    with open('tests/expected_etl_aggregated_events.csv') as expected_file:
        expected_data = expected_file.readlines()
        expected_data = expected_data[1:]
        expected_data.sort()

    indx_dates_df = calculate_index_date(events_df, mortality_df,
                                         deliverables_path)
    filtered_events_df = filter_events(events_df, indx_dates_df,
                                       deliverables_path)
    aggregated_events_df = aggregate_events(filtered_events_df, mortality_df,
                                            feature_map_df, deliverables_path)

    actual_data = []
    with open(deliverables_path + 'etl_aggregated_events.csv') as actual_file:
        actual_data = actual_file.readlines()
        actual_data = actual_data[1:]
        actual_data.sort()

    res = True
    msg = ""
    for idx, line in enumerate(expected_data):
        first = line.split(',')
        second = actual_data[idx].split(',')
        if not (float(first[0]) == float(second[0])
                and float(first[1]) == float(second[1])
                and abs(float(first[2]) - float(second[2])) <= 0.1):
            res = False
            msg = "Mistmatch on line %d. \n\nExpected: %s  \nActual: %s " % (
                idx + 1, line, actual_data[idx])
            break
    eq_(res, True, "Aggregated events do not match. " + msg)
Example #4
0
def test_aggregate_events():

    expected_data = []
    with open("tests/expected_etl_aggregated_events.csv") as expected_file:
        expected_data = expected_file.readlines()
        expected_data = expected_data[1:]
        expected_data.sort()

    indx_dates_df = calculate_index_date(events_df, mortality_df, deliverables_path)
    filtered_events_df = filter_events(events_df, indx_dates_df, deliverables_path)
    aggregated_events_df = aggregate_events(filtered_events_df, mortality_df, feature_map_df, deliverables_path)

    actual_data = []
    with open(deliverables_path + "etl_aggregated_events.csv") as actual_file:
        actual_data = actual_file.readlines()
        actual_data = actual_data[1:]
        actual_data.sort()

    res = True
    msg = ""
    for idx, line in enumerate(expected_data):
        first = line.split(",")
        second = actual_data[idx].split(",")
        if not (
            float(first[0]) == float(second[0])
            and float(first[1]) == float(second[1])
            and abs(float(first[2]) - float(second[2])) <= 0.1
        ):
            res = False
            msg = "Mistmatch on line %d. \n\nExpected: %s  \nActual: %s " % (idx + 1, line, actual_data[idx])
            break
    eq_(res, True, "Aggregated events do not match. " + msg)