def create_data(input_dir, file_names, output_dir, dtype, threadid): name_lst = [] i = 0 sz = len(file_names) for f in file_names: print("Importing data {}/{} for {} in thread {}...".format(i, sz, dtype, threadid)) i += 1 loc = os.path.join(input_dir, f + '.txt') out = os.path.join(input_dir, f + '.npy') import_data(loc, None, out) name_lst.append(out) # merge_n_split(name_lst, output_dir) merge(name_lst, output_dir, dtype, threadid)
def create_data(input_dir, file_names, output_dir, dtype, threadid, class_type): name_lst = [] i = 0 sz = len(file_names) for f in file_names: print("Importing data {}/{} for {} in thread {}...".format( i, sz, dtype, threadid)) i += 1 dset_id = f loc = os.path.join( input_dir, dset_id + '.txt') #from 'formated folder'; i.e. formated txt file out = os.path.join(input_dir, dset_id + '.npy') import_data(loc, None, out, class_type) name_lst.append(out) merge(name_lst, output_dir, dtype, threadid, class_type) print('"merge" is finished!')
elif i == 1: resp_list.append('The person is at risk of heart stroke.') else: print(f'somethings wrong, i is {i}') else: print( 'something is wrong in input. Contact administrator at [email protected]' ) resp_df = DataFrame(resp_list, columns=['Prediction']) st.subheader('Prediction') st.dataframe(resp_df) set_theme(style="white") train_df = import_data(train=True) corr = train_df.corr() # Generate a mask for the upper triangle mask = np.triu(np.ones_like(corr, dtype=bool)) # Set up the matplotlib figure f, ax = plt.subplots(figsize=(3, 3)) # Generate a custom diverging colormap cmap = diverging_palette(256, 20, as_cmap=True) res = heatmap( corr, vmax=1, square=True, cmap="YlGnBu", linewidths=0.1, annot=True,
, learning_rate=0.05 , eval_metric='AUC' , random_strength=6 , cat_features=cat_features , random_state=42, verbose=30 )) ovr.fit(x_train, y_train) cross_validated = np.mean(cross_val_score(ovr, x_train, y_train, cv=5)) print(f'Cross Validation Score: {cross_validated}') return ovr if __name__ == '__main__': df = import_data(train=True) test_df = import_data(features='Datasets/test_set_features.csv', train=False) cols = list(df.columns) set_df_values(df) df = clean_data(df) x_train, x_val, y_train, y_val, train_ids, val_ids = split_dataset(df, test_size=0.1, seed=42) x_train, y_train = x_train.astype(str), y_train.astype(int) x_val, y_val = x_val.astype(str), y_val.astype(int) # model = fit_random_search_model(x_train, y_train) model = fit_model(x_train, y_train) h1n1_preds, seasonal_preds = make_predictions(model, x_train) h1n1_true, seasonal_true = y_train['h1n1_vaccine'].values.tolist(), y_train['seasonal_vaccine'].values.tolist() train_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds) print(f'Training Accuracy: {train_score}')
def run_import(): main() import_data()