def test_learning(method='drop'): db = get_cleaned_data_from_DB(method) db_list = [] target = [] test = [] y_test = [] i = 0 for line in db: line_list = [] for key in line: if key == 'id': continue elif key == 'target': cls = line[key] if cls > 0: cls = 1 if i < 270: target.append(cls) else: y_test.append(cls) elif key == 'thal': if line[key] == 3: line_list += [1.0, 0.0, 0.0] elif line[key] == 6: line_list += [0.0, 1.0, 0.0] else: line_list += [0.0, 0.0, 1.0] elif key == 'pain_type': pain_type = [0.0, 0.0, 0.0, 0.0] pain_type[int(line[key]) - 1] = 1.0 line_list += pain_type elif key == 'electrocardiographic': el = [0.0, 0.0, 0.0, 0.0] el[int(line[key])] = 1.0 line_list += el else: line_list.append(line[key]) if i < 270: db_list.append(line_list) else: test.append(line_list) i += 1 X = np.array(db_list) X = normalize(X, axis=0, norm='max') y = np.array(target) test = np.array(test) test = normalize(test, axis=0, norm='max') clf = StackClassification() print(X.shape, y.shape) clf.fit(X, y) print(clf.predict(test)) for n in y_test: print(n, end=' ')
def learningCurve(method='drop', model_type='stack'): db = dbManipulation.get_cleaned_data_from_DB(method) db_list = [] target = [] for line in db: line_list = [] for key in line: if key == 'id': continue elif key == 'target': cls = line[key] if cls > 0: cls = 1 target.append(cls) elif key == 'thal': if line[key] == 3: line_list += [1.0, 0.0, 0.0] elif line[key] == 6: line_list += [0.0, 1.0, 0.0] else: line_list += [0.0, 0.0, 1.0] elif key == 'pain_type': pain_type = [0.0, 0.0, 0.0, 0.0] pain_type[int(line[key]) - 1] = 1.0 line_list += pain_type elif key == 'electrocardiographic': el = [0.0, 0.0, 0.0, 0.0] el[int(line[key])] = 1.0 line_list += el else: line_list.append(line[key]) db_list.append(line_list) X = np.array(db_list) X = normalize(X, axis=0, norm='max') y = np.array(target) print("Start training...") if model_type == 'stack': clf = StackClassification() else: clf = LogisticRegression(C=0.01, penalty='l2') train_sizes, train_scores, valid_scores = learning_curve( clf, X, y, train_sizes=[ 20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260 ], cv=10, scoring='accuracy') print("finish training") return train_sizes, np.mean(train_scores, axis=1), np.mean(valid_scores, axis=1)
def clustering(cluster_method='kmeans'): db = get_cleaned_data_from_DB(method='drop') db_list = [] target = [] for line in db: line_list = [] for key in line: if key == 'id': continue elif key == 'target': cls = line[key] if cls > 0: cls = 1 target.append(cls) elif key == 'thal': if line[key] == 3: line_list += [1.0, 0.0, 0.0] elif line[key] == 6: line_list += [0.0, 1.0, 0.0] else: line_list += [0.0, 0.0, 1.0] elif key == 'pain_type': pain_type = [0.0, 0.0, 0.0, 0.0] pain_type[int(line[key]) - 1] = 1.0 line_list += pain_type elif key == 'electrocardiographic': el = [0.0, 0.0, 0.0, 0.0] el[int(line[key])] = 1.0 line_list += el else: line_list.append(line[key]) db_list.append(line_list) X = np.array(db_list) X = normalize(X, axis=0, norm='max') y = np.array(target) pca = PCA(n_components=2) pca.fit(X.T) X = pca.components_.T if cluster_method == 'spectral': clustering = SpectralClustering(n_clusters=2, assign_labels="discretize", random_state=0).fit(X) else: clustering = KMeans(n_clusters=2, random_state=0).fit(X) return X, clustering.labels_, y
def training_model(method='drop', model_type='stack'): db = dbManipulation.get_cleaned_data_from_DB(method) db_list = [] target = [] for line in db: line_list = [] for key in line: if key == 'id': continue elif key == 'target': cls = line[key] if cls > 0: cls = 1 target.append(cls) elif key == 'thal': if line[key] == 3: line_list += [1.0, 0.0, 0.0] elif line[key] == 6: line_list += [0.0, 1.0, 0.0] else: line_list += [0.0, 0.0, 1.0] elif key == 'pain_type': pain_type = [0.0, 0.0, 0.0, 0.0] pain_type[int(line[key]) - 1] = 1.0 line_list += pain_type elif key == 'electrocardiographic': el = [0.0, 0.0, 0.0, 0.0] el[int(line[key])] = 1.0 line_list += el else: line_list.append(line[key]) db_list.append(line_list) X = np.array(db_list) X = normalize(X, axis=0, norm='max') y = np.array(target) if model_type == 'stack': clf = StackClassification() else: clf = LogisticRegression(C=0.01, penalty='l2') clf.fit(X, y) return clf
def cleanInput(inputX): cleanData = get_cleaned_data_from_DB(method='drop') cleaned_List = [] for data in cleanData: line_list = [] for key in data: if key == 'target' or key == 'id': continue else: line_list.append(data[key]) cleaned_List.append(line_list) cleaned_List = np.array(cleaned_List) mean_vals = np.mean(cleaned_List, axis=0) key_map = feature_map() input_list = [] for i in range(1, 14): if i == 10 and (inputX[key_map[i]] == 0 or inputX[key_map[i]] == '?'): il = 1.0 elif inputX[key_map[i]] != '?': il = float(inputX[key_map[i]]) else: il = inputX[key_map[i]] input_list.append(il) missing_index = [] for i in range(len(input_list)): if input_list[i] == '?': input_list[i] = mean_vals[i] missing_index.append(i) for i in missing_index: X = np.concatenate((cleaned_List[:, :i], cleaned_List[:, i + 1:]), axis=1) y = cleaned_List[:, i:i + 1].reshape(1, -1)[0] test = np.array(input_list[:i] + input_list[i + 1:]).reshape(1, -1) input_list[i] = arrayKNN(X, y, test)[0] cleandict = {} for i in range(1, 14): cleandict[key_map[i]] = input_list[i - 1] line_list = [] for key in cleandict: if key == 'thal': if cleandict[key] == 3: line_list += [1.0, 0.0, 0.0] elif cleandict[key] == 6: line_list += [0.0, 1.0, 0.0] else: line_list += [0.0, 0.0, 1.0] elif key == 'pain_type': pain_type = [0.0, 0.0, 0.0, 0.0] pain_type[int(cleandict[key]) - 1] = 1.0 line_list += pain_type elif key == 'electrocardiographic': el = [0.0, 0.0, 0.0, 0.0] el[int(cleandict[key])] = 1.0 line_list += el else: line_list.append(cleandict[key]) InputX = np.array(line_list) db = get_cleaned_data_from_DB(method='drop') db_list = [] target = [] for line in db: line_list = [] for key in line: if key == 'id': continue elif key == 'target': cls = line[key] if cls > 0: cls = 1 target.append(cls) elif key == 'thal': if line[key] == 3: line_list += [1.0, 0.0, 0.0] elif line[key] == 6: line_list += [0.0, 1.0, 0.0] else: line_list += [0.0, 0.0, 1.0] elif key == 'pain_type': pain_type = [0.0, 0.0, 0.0, 0.0] pain_type[int(line[key]) - 1] = 1.0 line_list += pain_type elif key == 'electrocardiographic': el = [0.0, 0.0, 0.0, 0.0] el[int(line[key])] = 1.0 line_list += el else: line_list.append(line[key]) db_list.append(line_list) X = np.array(db_list) concat = np.concatenate((X, InputX.reshape(1, -1)), axis=0) concat = normalize(concat, axis=0, norm='max') return concat[-1, :].reshape(1, -1)