train_csv_file_path = "%s/ML/KNN/data/cancer/b_cancer/cancer_data_train.csv" % q_src_dir
test_csv_file_path = "%s/ML/KNN/data/cancer/b_cancer/cancer_data_test.csv" % q_src_dir
graphviz_gini = "graphviz_gini.txt"
graphviz_entropy = "graphviz_entropy.txt"
goal_col_name = "diagnosis"

# In[12]:

print("Train dataset shape")
train_data = utils.import_data(train_csv_file_path)
print("Test dataset shape")
test_data = utils.import_data(test_csv_file_path)

# In[4]:

X, Y, X_train, temp_X_train, y_train, temp_y_train = utils.split_dataset(
    train_data, goal_col_name, 1)
X, Y, X_test, temp_X_test, y_test, temp_y_test = utils.split_dataset(
    test_data, goal_col_name, 1)

# In[13]:

#print(len(X_train))
#print(len(X_test))

# In[6]:

# cross validation
# cross_validate_dt_new(X, Y)

# In[7]:
Esempio n. 2
0
    print("'Q_SRC_ROOT' is not set")
    exit(-1)
csv_file_path = "%s/ML/KNN/data/cancer/b_cancer/cancer_data.csv" % q_src_dir
graphviz_gini = "graphviz_gini.txt"
graphviz_entropy = "graphviz_entropy.txt"
goal_col_name = "diagnosis"
split_ratio = 0.5

# In[14]:

print("Dataset shape")
data = utils.import_data(csv_file_path)

# In[4]:

X, Y, X_train, X_test, y_train, y_test = utils.split_dataset(
    data, goal_col_name, split_ratio)

# In[5]:

# len(X_test)

# In[6]:

# print(len(X.columns))

# In[7]:

# print(len(data.columns))

# In[8]: