Ejemplo n.º 1
0
def get_ex_normal():
    df_normal = data_util.load_all('Extracranial' + os.sep +
                                   'normal_ext_na_ou.csv')
    df_normal['normal'] = 0
    id = df_normal[['ID']]
    x_data = df_normal.iloc[:, 1:111]
    y_data = df_normal[['normal']]
    return id, x_data, y_data
Ejemplo n.º 2
0
def get_ex_data(target):
    df_target = data_util.load_all('Extracranial' + os.sep + target +
                                   '_ext_na_ou.csv')
    df_target[target] = 1
    df_normal = data_util.load_all('Extracranial' + os.sep +
                                   'normal_ext_na_ou.csv')
    df_normal[target] = 0
    resample_size = df_target.shape[0]
    df_n_downsampled = resample(
        df_normal,
        replace=False,  # sample without replacement
        n_samples=resample_size)  # to match minority class

    df_all = pd.concat([df_target, df_n_downsampled], axis=0).sample(frac=1)
    id = df_all[['ID']]
    x_data = df_all.drop(['ID', target], axis=1)
    y_data = df_all[[target]]
    return id, x_data, y_data
Ejemplo n.º 3
0
def get_ex_in_all():
    df = data_util.load_all('Extracranial+Intracranial' + os.sep +
                            'ALL_int_ext_na_ou.csv')
    #
    df_n = df[(df['RCCA'] == 0) & (df['REICA'] == 0) & (df['RIICA'] == 0) &
              (df['RACA'] == 0) & (df['RMCA'] == 0) & (df['RPCA'] == 0) &
              (df['REVA'] == 0) & (df['RIVA'] == 0) & (df['BA'] == 0) &
              (df['LCCA'] == 0) & (df['LEICA'] == 0) & (df['LIICA'] == 0) &
              (df['LACA'] == 0) & (df['LMCA'] == 0) & (df['LPCA'] == 0) &
              (df['LEVA'] == 0) & (df['LIVA'] == 0)]
    df_s = df.drop(index=df_n.index)
    resample_size = df_s.shape[0]
    df_n_downsampled = resample(
        df_n,
        replace=False,  # sample without replacement
        n_samples=resample_size)  # to match minority class

    df_all = pd.concat([df_s, df_n_downsampled], axis=0).sample(frac=1)
    id = df_all[['ID']]
    x_data = df_all.iloc[:, 1:166]
    y_data = df_all.iloc[:, 166:]
    return id, x_data, y_data
Ejemplo n.º 4
0
def outliers_iqr(ys):
    '''
    http://colingorrie.github.io/outlier-detection.html
    '''
    ys = ys.apply(pd.to_numeric, errors='coerce')
    quartile_1 = ys.quantile(0.25)
    quartile_3 = ys.quantile(0.75)
    iqr = quartile_3 - quartile_1
    lower_bound = quartile_1 - (iqr * 1.5)
    upper_bound = quartile_3 + (iqr * 1.5)
    return (ys > upper_bound) | (ys < lower_bound)


if __name__ == '__main__':
    df = data_util.load_all('carotid_new.csv')
    id = df[['ID']]
    x_data = df.iloc[:, 1:126]
    y_data = df.iloc[:, 126:]

    x_data_n = x_data[y_data['Stenosis_code'] == 0]
    x_data_s = x_data[y_data['Stenosis_code'] == 1]
    for col in x_data:
        x_data_n.loc[outliers_iqr(x_data_n[col]), col] = np.nan
        x_data_s.loc[outliers_iqr(x_data_s[col]), col] = np.nan
    x_data_n2 = Imputer(missing_values=np.nan, strategy='mean', axis=0).fit_transform(x_data_n)
    x_data_s2 = Imputer(missing_values=np.nan, strategy='mean', axis=0).fit_transform(x_data_s)

    x_data_n = pd.DataFrame(x_data_n2, index=x_data_n.index, columns=x_data.columns)
    x_data_s = pd.DataFrame(x_data_s2, index=x_data_s.index, columns=x_data.columns)
    x_data_f = pd.concat([x_data_n, x_data_s], axis=0)