def test_index_date():

    expected_indx_dates = {
        19: datetime.datetime(3397, 8, 4, 0, 0),
        99: datetime.datetime(2727, 3, 17, 0, 0),
        24581: datetime.datetime(3401, 6, 23, 0, 0),
        3014: datetime.datetime(2581, 5, 1, 0, 0)
    }
    calculate_index_date(events_df, mortality_df, deliverables_path)

    indx_date_df = pd.read_csv(deliverables_path + 'etl_index_dates.csv')
    indx_date_df['indx_date'] = indx_date_df['indx_date'].apply(date_convert)

    indx_date = dict(zip(indx_date_df.patient_id, indx_date_df.indx_date))

    if isinstance(indx_date, pd.DataFrame):
        indx_date = dict(zip(indx_date.patient_id, indx_date.indx_date))
    res = True

    if len(indx_date) != len(expected_indx_dates):
        res = False
    else:
        for key, value in expected_indx_dates.iteritems():
            if key not in indx_date:
                res = False
                break
            if not abs(indx_date[key].date() == value.date()):
                res = False
                break

    eq_(res, True, "Index dates do not match")
def test_index_date ():
    
    expected_indx_dates = {19: datetime.datetime(3397, 8, 4, 0, 0), 99: datetime.datetime(2727, 3, 17, 0, 0), 24581: datetime.datetime(3401, 6, 23, 0, 0), 3014: datetime.datetime(2581, 5, 1, 0, 0)}
    calculate_index_date(events_df, mortality_df, deliverables_path)
    
    indx_date_df = pd.read_csv(deliverables_path + 'etl_index_dates.csv')
    indx_date_df['indx_date'] = indx_date_df['indx_date'].apply(date_convert)

    indx_date = dict(zip(indx_date_df.patient_id, indx_date_df.indx_date))

    if isinstance(indx_date, pd.DataFrame):
        indx_date =  dict(zip(indx_date.patient_id, indx_date.indx_date))
    res= True
    
    if len(indx_date) != len(expected_indx_dates):
        res = False
    else: 
        for key, value in expected_indx_dates.iteritems():
            if key  not in  indx_date:
                res = False;
                break;
            if not abs(indx_date[key].date()  == value.date()):
                res = False;
                break;

    eq_(res, True, "Index dates do not match")
def test_filtered_events ():
    
    expected_data = []
    with open('tests/expected_etl_filtered_events.csv') as expected_file:
        expected_data = expected_file.readlines()
        expected_data = expected_data[1:]
        expected_data.sort()

    indx_dates_df = calculate_index_date(events_df, mortality_df, deliverables_path)
    filter_events(events_df, indx_dates_df, deliverables_path)
    actual_data = []
    with open(deliverables_path + 'etl_filtered_events.csv') as actual_file:
        actual_data = actual_file.readlines()
        actual_data = actual_data[1:]
        actual_data.sort()

    res = True
    msg = ""
    for idx,line in enumerate(expected_data):
        first = line.split(',')
        second = actual_data[idx].split(',')
        if not (float(first[0])==float(second[0]) and first[1] == second[1] and float(first[2])==float(second[2])):
            res = False
            msg = "Mistmatch on line %d. \n\nExpected: %s  \nActual: %s " %(idx+1, line, actual_data[idx])
            break   

    eq_(res, True, "Filtered events do not match. " + msg)
def test_aggregate_events():

    expected_data = []
    with open('tests/expected_etl_aggregated_events.csv') as expected_file:
        expected_data = expected_file.readlines()
        expected_data = expected_data[1:]
        expected_data.sort()

    indx_dates_df = calculate_index_date(events_df, mortality_df,
                                         deliverables_path)
    filtered_events_df = filter_events(events_df, indx_dates_df,
                                       deliverables_path)
    aggregated_events_df = aggregate_events(filtered_events_df, mortality_df,
                                            feature_map_df, deliverables_path)

    actual_data = []
    with open(deliverables_path + 'etl_aggregated_events.csv') as actual_file:
        actual_data = actual_file.readlines()
        actual_data = actual_data[1:]
        actual_data.sort()

    res = True
    msg = ""
    for idx, line in enumerate(expected_data):
        first = line.split(',')
        second = actual_data[idx].split(',')
        if not (float(first[0]) == float(second[0])
                and float(first[1]) == float(second[1])
                and abs(float(first[2]) - float(second[2])) <= 0.1):
            res = False
            msg = "Mistmatch on line %d. \n\nExpected: %s  \nActual: %s " % (
                idx + 1, line, actual_data[idx])
            break
    eq_(res, True, "Aggregated events do not match. " + msg)