def test_index_date (): expected_indx_dates = {19: datetime.datetime(2014, 2, 2, 0, 0), 99: datetime.datetime(2013, 10, 13, 0, 0), 24581: datetime.datetime(2013, 11, 0o7, 0, 0), 3014: datetime.datetime(2015, 11, 17, 0, 0)} calculate_index_date(events_df, mortality_df, deliverables_path) indx_date_df = pd.read_csv(deliverables_path + 'etl_index_dates.csv', parse_dates=['indx_date']) indx_date = dict(list(zip(indx_date_df.patient_id, indx_date_df.indx_date))) if isinstance(indx_date, pd.DataFrame): indx_date = dict(list(zip(indx_date.patient_id, indx_date.indx_date))) res= True if len(indx_date) != len(expected_indx_dates): res = False else: for key, value in list(expected_indx_dates.items()): if key not in indx_date: res = False; break; if not abs(indx_date[key].date() == value.date()): res = False; break; eq_(res, True, "Index dates do not match")
def test_filtered_events (): expected_data = [] with open('tests/expected_etl_filtered_events.csv') as expected_file: expected_data = expected_file.readlines() expected_data = expected_data[1:] expected_data.sort() indx_dates_df = calculate_index_date(events_df, mortality_df, deliverables_path) filter_events(events_df, indx_dates_df, deliverables_path) actual_data = [] with open(deliverables_path + 'etl_filtered_events.csv') as actual_file: actual_data = actual_file.readlines() actual_data = actual_data[1:] actual_data.sort() res = True msg = "" for idx,line in enumerate(expected_data): first = line.split(',') second = actual_data[idx].split(',') if not (float(first[0])==float(second[0]) and first[1] == second[1] and float(first[2])==float(second[2])): res = False msg = "Mistmatch on line %d. \n\nExpected: %s \nActual: %s " %(idx+1, line, actual_data[idx]) break eq_(res, True, "Filtered events do not match. " + msg)