def main(path, predict_column): df = pd.read_csv(path) columns = df.columns.values X = [ df[column].values for column in columns if column != predict_column and column != 'Name' ] y = df[predict_column].values swtch = Switcher() sk_X = [] for column in X: converted = np.array(swtch.convert_to(column)) sk_X.append(converted) sk_X = np.array(sk_X).T sk_y = np.array(swtch.convert_to(y)).T decisions = ID3() entropy = decisions.fit(X, y) print(f'Entropies:\t\t\t{entropy}') sk_decisions = DecisionTreeClassifier(random_state=1370) sk_decisions.fit(sk_X, sk_y) print(f'SkLearn Tree Decision accuracy:\t{sk_decisions.score(sk_X, sk_y)}') #print(sk_decisions.predict(['pycharm', 'Java', 'tea'])) return 0
def main(path, predict_column): df = pd.read_csv(path) columns = df.columns.values X = [ df[column].values for column in columns if column != predict_column and column != 'Name' ] y = df[predict_column].values decisions = ID3() fitted = decisions.fit(X, y) print(f'Entropy of {predict_column}:\t\t\t{fitted[1]}\n') print(f'Information Gains of Xs\n') headers = ' '.join( map( str, list( filter(lambda x: x != predict_column and x != "Name", df.columns.values)))).replace(' ', '\t | ') print(f'{headers}') values = ' '.join(map(lambda x: "{0:.8f}".format(x), fitted[0])).replace(' ', '\t | ') print(values) print('\n\n') swtch = Switcher() sk_X = [] for column in X: converted = np.array(swtch.convert_to(column)) sk_X.append(converted) sk_X = np.array(sk_X).T sk_y = np.array(swtch.convert_to(y)).T x_train, x_test, y_train, y_test = train_test_split(sk_X, sk_y, test_size=0.3) sk_decisions = DecisionTreeClassifier(random_state=1370) sk_decisions.fit(x_train, y_train) print( f'SkLearn Tree Decision accuracy:\t{sk_decisions.score(x_train, y_train)}' ) print(f'SkLearn Tree Decision prediction: {sk_decisions.predict(x_test)}') return 0