train_csv_file_path = "%s/ML/KNN/data/cancer/b_cancer/cancer_data_train.csv" % q_src_dir test_csv_file_path = "%s/ML/KNN/data/cancer/b_cancer/cancer_data_test.csv" % q_src_dir graphviz_gini = "graphviz_gini.txt" graphviz_entropy = "graphviz_entropy.txt" goal_col_name = "diagnosis" # In[12]: print("Train dataset shape") train_data = utils.import_data(train_csv_file_path) print("Test dataset shape") test_data = utils.import_data(test_csv_file_path) # In[4]: X, Y, X_train, temp_X_train, y_train, temp_y_train = utils.split_dataset( train_data, goal_col_name, 1) X, Y, X_test, temp_X_test, y_test, temp_y_test = utils.split_dataset( test_data, goal_col_name, 1) # In[13]: #print(len(X_train)) #print(len(X_test)) # In[6]: # cross validation # cross_validate_dt_new(X, Y) # In[7]:
print("'Q_SRC_ROOT' is not set") exit(-1) csv_file_path = "%s/ML/KNN/data/cancer/b_cancer/cancer_data.csv" % q_src_dir graphviz_gini = "graphviz_gini.txt" graphviz_entropy = "graphviz_entropy.txt" goal_col_name = "diagnosis" split_ratio = 0.5 # In[14]: print("Dataset shape") data = utils.import_data(csv_file_path) # In[4]: X, Y, X_train, X_test, y_train, y_test = utils.split_dataset( data, goal_col_name, split_ratio) # In[5]: # len(X_test) # In[6]: # print(len(X.columns)) # In[7]: # print(len(data.columns)) # In[8]: