Exemple #1
0
def create_data(input_dir, file_names, output_dir, dtype, threadid):
    name_lst = []
    i = 0
    sz = len(file_names)
    for f in file_names:
        print("Importing data {}/{} for {} in thread {}...".format(i, sz, dtype, threadid))
        i += 1
        loc = os.path.join(input_dir, f + '.txt')
        out = os.path.join(input_dir, f + '.npy')
        import_data(loc, None, out)
        name_lst.append(out)
    # merge_n_split(name_lst, output_dir)
    merge(name_lst, output_dir, dtype, threadid)
Exemple #2
0
def create_data(input_dir, file_names, output_dir, dtype, threadid,
                class_type):
    name_lst = []
    i = 0
    sz = len(file_names)
    for f in file_names:
        print("Importing data {}/{} for {} in thread {}...".format(
            i, sz, dtype, threadid))
        i += 1
        dset_id = f

        loc = os.path.join(
            input_dir,
            dset_id + '.txt')  #from 'formated folder'; i.e. formated txt file
        out = os.path.join(input_dir, dset_id + '.npy')
        import_data(loc, None, out, class_type)
        name_lst.append(out)

    merge(name_lst, output_dir, dtype, threadid, class_type)
    print('"merge" is finished!')
            elif i == 1:
                resp_list.append('The person is at risk of heart stroke.')
            else:
                print(f'somethings wrong, i is {i}')

    else:
        print(
            'something is wrong in input. Contact administrator at [email protected]'
        )
    resp_df = DataFrame(resp_list, columns=['Prediction'])
    st.subheader('Prediction')
    st.dataframe(resp_df)

    set_theme(style="white")

    train_df = import_data(train=True)
    corr = train_df.corr()
    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corr, dtype=bool))
    # Set up the matplotlib figure
    f, ax = plt.subplots(figsize=(3, 3))
    # Generate a custom diverging colormap
    cmap = diverging_palette(256, 20, as_cmap=True)

    res = heatmap(
        corr,
        vmax=1,
        square=True,
        cmap="YlGnBu",
        linewidths=0.1,
        annot=True,
                                                           , learning_rate=0.05
                                                           , eval_metric='AUC'
                                                           , random_strength=6

                                                           , cat_features=cat_features
                                                           , random_state=42,
                                                           verbose=30
                                                           ))
    ovr.fit(x_train, y_train)
    cross_validated = np.mean(cross_val_score(ovr, x_train, y_train, cv=5))
    print(f'Cross Validation Score: {cross_validated}')
    return ovr


if __name__ == '__main__':
    df = import_data(train=True)
    test_df = import_data(features='Datasets/test_set_features.csv', train=False)
    cols = list(df.columns)
    set_df_values(df)
    df = clean_data(df)
    x_train, x_val, y_train, y_val, train_ids, val_ids = split_dataset(df, test_size=0.1, seed=42)
    x_train, y_train = x_train.astype(str), y_train.astype(int)
    x_val, y_val = x_val.astype(str), y_val.astype(int)

    # model = fit_random_search_model(x_train, y_train)
    model = fit_model(x_train, y_train)
    h1n1_preds, seasonal_preds = make_predictions(model, x_train)
    h1n1_true, seasonal_true = y_train['h1n1_vaccine'].values.tolist(), y_train['seasonal_vaccine'].values.tolist()
    train_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds)
    print(f'Training Accuracy: {train_score}')
def run_import():
    main()
    import_data()