def test_index_date(): expected_indx_dates = { 19: datetime.datetime(3397, 8, 4, 0, 0), 99: datetime.datetime(2727, 3, 17, 0, 0), 24581: datetime.datetime(3401, 6, 23, 0, 0), 3014: datetime.datetime(2581, 5, 1, 0, 0) } calculate_index_date(events_df, mortality_df, deliverables_path) indx_date_df = pd.read_csv(deliverables_path + 'etl_index_dates.csv') indx_date_df['indx_date'] = indx_date_df['indx_date'].apply(date_convert) indx_date = dict(zip(indx_date_df.patient_id, indx_date_df.indx_date)) if isinstance(indx_date, pd.DataFrame): indx_date = dict(zip(indx_date.patient_id, indx_date.indx_date)) res = True if len(indx_date) != len(expected_indx_dates): res = False else: for key, value in expected_indx_dates.iteritems(): if key not in indx_date: res = False break if not abs(indx_date[key].date() == value.date()): res = False break eq_(res, True, "Index dates do not match")
def test_index_date(): expected_indx_dates = { 19: datetime.datetime(3397, 8, 4, 0, 0), 99: datetime.datetime(2727, 3, 17, 0, 0), 24581: datetime.datetime(3401, 6, 23, 0, 0), 3014: datetime.datetime(2581, 5, 1, 0, 0), } calculate_index_date(events_df, mortality_df, deliverables_path) indx_date_df = pd.read_csv(deliverables_path + "etl_index_dates.csv") indx_date_df["indx_date"] = indx_date_df["indx_date"].apply(date_convert) indx_date = dict(zip(indx_date_df.patient_id, indx_date_df.indx_date)) if isinstance(indx_date, pd.DataFrame): indx_date = dict(zip(indx_date.patient_id, indx_date.indx_date)) res = True if len(indx_date) != len(expected_indx_dates): res = False else: for key, value in expected_indx_dates.iteritems(): if key not in indx_date: res = False break if not abs(indx_date[key].date() == value.date()): res = False break eq_(res, True, "Index dates do not match")
def test_aggregate_events(): expected_data = [] with open('tests/expected_etl_aggregated_events.csv') as expected_file: expected_data = expected_file.readlines() expected_data = expected_data[1:] expected_data.sort() indx_dates_df = calculate_index_date(events_df, mortality_df, deliverables_path) filtered_events_df = filter_events(events_df, indx_dates_df, deliverables_path) aggregated_events_df = aggregate_events(filtered_events_df, mortality_df, feature_map_df, deliverables_path) actual_data = [] with open(deliverables_path + 'etl_aggregated_events.csv') as actual_file: actual_data = actual_file.readlines() actual_data = actual_data[1:] actual_data.sort() res = True msg = "" for idx, line in enumerate(expected_data): first = line.split(',') second = actual_data[idx].split(',') if not (float(first[0]) == float(second[0]) and float(first[1]) == float(second[1]) and abs(float(first[2]) - float(second[2])) <= 0.1): res = False msg = "Mistmatch on line %d. \n\nExpected: %s \nActual: %s " % ( idx + 1, line, actual_data[idx]) break eq_(res, True, "Aggregated events do not match. " + msg)
def test_aggregate_events(): expected_data = [] with open("tests/expected_etl_aggregated_events.csv") as expected_file: expected_data = expected_file.readlines() expected_data = expected_data[1:] expected_data.sort() indx_dates_df = calculate_index_date(events_df, mortality_df, deliverables_path) filtered_events_df = filter_events(events_df, indx_dates_df, deliverables_path) aggregated_events_df = aggregate_events(filtered_events_df, mortality_df, feature_map_df, deliverables_path) actual_data = [] with open(deliverables_path + "etl_aggregated_events.csv") as actual_file: actual_data = actual_file.readlines() actual_data = actual_data[1:] actual_data.sort() res = True msg = "" for idx, line in enumerate(expected_data): first = line.split(",") second = actual_data[idx].split(",") if not ( float(first[0]) == float(second[0]) and float(first[1]) == float(second[1]) and abs(float(first[2]) - float(second[2])) <= 0.1 ): res = False msg = "Mistmatch on line %d. \n\nExpected: %s \nActual: %s " % (idx + 1, line, actual_data[idx]) break eq_(res, True, "Aggregated events do not match. " + msg)