help(DecisionTreeClassifier) ''' criterion : {"gini", "entropy"}, default="gini" max_depth : int, default=None # 트리 깊이=> 크면클수록 분류정확도 좋은(but, 오버피팅) min_samples_split : int or float, default=2 # 가지치기 ''' dtc = DecisionTreeClassifier(criterion='gini', random_state=123, max_depth=3) model = dtc.fit(x_train, y_train) # 트리모델 시각화 tree.plot_tree(model) #@@1 print(export_text(model)) ''' |--- feature_2 <= 2.45 : 3번 칼럼 분류조건(왼쪽노드) | |--- class: 0 -> 'setosa' 100% 분류 |--- feature_2 > 2.45 : 3번 칼럼 분류조건(오른쪽노드) | |--- feature_2 <= 4.75 | | |--- class: 1 | |--- feature_2 > 4.75 | | |--- feature_3 <= 1.75 | | | |--- class: 1 | | |--- feature_3 > 1.75 | | | |--- class: 2 ''' names = iris.feature_names '''
starting_index = 14 xAttributes = [ 'Developers', 'Commit #', 'Closed Issues', 'Releases', 'Tags', 'Open Issues', 'Duration', 'Stars', 'Forks', 'Watchers' ] counts = {} for attr in xAttributes: counts[attr] = 0 for index in range(starting_index, starting_index + sample_count): combined_df, myTree = getDecisionTree(index) #tree.plot_tree(myTree) #plt.show() r = export_text(myTree, feature_names=xAttributes, show_weights=True) print('index = ' + str(index)) print(r) #print('Importances:') for i in range(0, len(xAttributes)): imp = myTree.feature_importances_[i] feature_name = xAttributes[i] #print(feature_name + " = " + str(imp)) if (imp != 0): val = counts.get(feature_name, 0) counts[feature_name] = val + 1 print("Attribute wise counts for the sample decision trees") arr = [(k, counts[k]) for k in sorted(counts, key=counts.get, reverse=True)] for pr in arr: print(pr[0] + ' = ' + str(pr[1]))
def determine_insect_size(arealist, thresh): from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier from sklearn.model_selection import train_test_split # Import train_test_split function from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation from sklearn.tree.export import export_text import pandas as pd numofrects = [] rectcount = 0 for i in range(len(arealist)): numofrects.append(rectcount) rectcount += 1 ### Load csv ### col_names = [ 'No. of Rects', 'Max Area', 'Area Range', 'Standard Deviation', 'CLASSIFICATION' ] insectcsv = pd.read_csv("EdgeDetection_InsectSize_DT_Train.csv", header=None, names=col_names) feature_cols = [ 'No. of Rects', 'Max Area', 'Area Range', 'Standard Deviation' ] X = insectcsv[feature_cols] y = insectcsv.CLASSIFICATION X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.1, random_state=1) #split into 0.7 train and 0.3 test clf = DecisionTreeClassifier(criterion="entropy", max_depth=5) # Train Decision Tree Classifer clf = clf.fit(X_train, y_train) imagearea = thresh.shape[0] * thresh.shape[1] arealist = sorted(arealist) maxarealistrange = int(len(arealist) * 0.9) sixtyarealistrange = int(len(arealist) * 0.6) fourtyarealistrange = int(len(arealist) * 0.3) minarealistrange = int(len(arealist) * 0.1) overlaplist, overlappedrects = get_overlap(rects) numofrects = len(rects) numofoverlaps = len(overlappedrects) differenceinoverlap = numofrects - numofoverlaps maxarea = int(statistics.mean(arealist[maxarealistrange:])) midarea = int( statistics.mean(arealist[fourtyarealistrange:sixtyarealistrange])) minarea = int(statistics.mean(arealist[:minarealistrange])) arearange = maxarea - minarea stdarea = int(statistics.stdev(arealist)) testing_data = [] testing_data.append((numofrects, maxarea, arearange, stdarea)) #Predict the response for test dataset y_pred = clf.predict(testing_data) if y_pred == 1: print("DT Prediction: Big Insects") else: print("DT Prediction: Small Insects") r = export_text(clf, feature_names=feature_cols) print(r) print(testing_data) prediction = [] if numofrects <= 350: prediction.append(1) else: prediction.append(0) if maxarea >= 56000: prediction.append(1) else: prediction.append(0) if arearange >= 51000: prediction.append(1) else: prediction.append(0) if stdarea >= 20000: prediction.append(1) else: prediction.append(0) print(prediction) prediction_final = sum(prediction) if (prediction_final >= 3): print("Processing Big Insects") removedoverlappedrects_big = remove_overlapping_rectangles( overlaplist, rects) bigrects_analysed = analyse_areas_biginsects( removedoverlappedrects_big) rects_bi, numofrects_bi = draw_rectangles_biginsects(bigrects_analysed) save_coordinates_to_xml(filename, numofrects_bi, rects_bi) else: print("Processing Small Insects") bigrectsremoved = analyse_areas_smallinsects(rects) mergedrects = merge_rectangles(bigrectsremoved) overlaplist_small, overlappedrects_small = get_overlap(mergedrects) removedoverlappedrects_small = remove_overlapping_rectangles( overlaplist_small, mergedrects) rects_si, numofrects_si = draw_rectangles_smallinsects( removedoverlappedrects_small) save_coordinates_to_xml(filename, numofrects_si, rects_si)
print(y) display(df) from sklearn import tree from sklearn.preprocessing import OneHotEncoder from sklearn.tree.export import export_text #enc = OneHotEncoder(handle_unknown='ignore') #display(df) clf = tree.DecisionTreeClassifier(max_depth=5) clf = clf.fit(df, y) tree.plot_tree(clf.fit(df, y)) print(export_text(clf)) #dotfile = open("dt.dot", 'w') #tree.export_graphviz(clf, out_file=dotfile, feature_names=df.columns) #dotfile.close() ################################################# import numpy as np import pandas as pd import io from IPython.display import clear_output from matplotlib import pyplot as plt import requests url = 'https://raw.githubusercontent.com/SergeGuillemart/Hackathon-BigData-2019/master/ressources/Traite/Finished/Total3.csv'
### Decision Tree using Scikit import numpy as np import pandas as pd from sklearn.tree import DecisionTreeClassifier from sklearn.tree.export import export_text df = pd.read_csv('Iris.csv') df = df.drop("Id", axis=1) df = df.rename(columns={"species": "label"}) # Train test split def train_test_split(df, test_size=0.8, random_state=None): train_df = df.sample(frac=test_size, random_state=random_state) test_df = df[~df.index.isin(train_df.index)] return train_df.sort_index(), test_df.sort_index() train_df, test_df = train_test_split(df, 0.8, 100) decision_tree = DecisionTreeClassifier(random_state=0, max_depth=3) decision_tree = decision_tree.fit(train_df.iloc[:, :-1], train_df.iloc[:, -1]) formated_tree = export_text(decision_tree, feature_names=df.iloc[:, :-1].columns.tolist()) print(formated_tree) #### Evaluate decision_tree.score(test_df.iloc[:, :-1], test_df.iloc[:, -1]) * 100
def generar_arbol(codigo, profundidad=5, estadisticas=True): """Genera un arbol de decision (clasificador) Devuelve el arbol generado y el porcentaje de acierto al validar el modelo con el conjunto de entrenamiento. Parametros: codigo -- codigo de la estacion de calidad del aire profundidad -- profundidad maxima del arbol. Por defecto 5 estadisticas -- True si se quiere exportar un fichero con estadisticas. False si no se quiere generar. Por defecto True """ path = "../data/entrenamiento/" + str(codigo) + "/" f = open(path + "data_tree_" + str(codigo) + ".txt") cab = str(f.readline())[:-1] cab = cab.replace("'", "") cab = cab.replace(" ", "") cabeceras = cab[1:-1].split(",") l_etiquetas = str(f.readline())[:-1].split(",") atributos = str(f.readline())[:-1].split(",,") f.close() l_atributos = [] for atributo in atributos: l_atributos.append(atributo.split(",")) l_etiquetas = np.array(l_etiquetas) l_atributos = np.array(l_atributos) etiquetas = set(l_etiquetas) clases = sorted(list(etiquetas)) #Separamos los datos que vamos a utilizar para entrenar y para validar. #datos de validacion (0.25) / datos de entrenamiento (0.75) X_train, X_test, y_train, y_test = train_test_split(l_atributos, l_etiquetas, test_size=0.25, random_state=0) #Normalizacion de los datos sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) #Creamos el arbol con la profundidad indicada arbol = tree.DecisionTreeClassifier(max_depth=profundidad, criterion='entropy', random_state=0) #Entrenamos el arbol arbol.fit(X_train, y_train) #Exportar el arbol en texto r = export_text(arbol, feature_names=cabeceras[:-1]) f = open(path + "export_tree_text_" + str(codigo) + ".txt", "w") f.write(r) f.close() #Exportar datos del arbol .dot export_graphviz(arbol, out_file=path + 'export_tree_' + str(codigo) + '.dot', class_names=clases, feature_names=cabeceras[:-1], impurity=False, filled=True) # Predicion de los resultados del bloque test y_pred = arbol.predict(X_test) #Matriz de confusion cm = confusion_matrix(y_test, y_pred) #Reportes report = classification_report(y_test, y_pred) if estadisticas == True: f = open(path + "export_tree_statistics_" + str(codigo) + ".txt", "w") f.write("************************ CLASS *************************\n") for i in range(len(clases)): f.write(str(clases[i]) + "\n") f.write("\n\n") f.write("********************* MAX DEPTH ************************\n") f.write(str(profundidad) + "\n") f.write("\n\n") f.write("***************** FEATURE IMPORTANCES ******************\n") featur_imp = arbol.feature_importances_ for i in range(len(cabeceras[:-1])): f.write(str(cabeceras[i]) + ": " + str(featur_imp[i]) + "\n") f.write("\n\n") f.write("************************ SCORE *************************\n") f.write("With Test data: " + str(arbol.score(X_test, y_test)) + "\n") f.write("With Training data: " + str(arbol.score(X_train, y_train)) + "\n") f.write("\n\n") f.write("****************** CONFUSION MATRIX ********************\n") if len(clases) == 2: f.write("\t0\t1\n") f.write("---------------------\n") f.write("0 |\t" + str(cm[0][0]) + "\t" + str(cm[0][1]) + "\n") f.write("1 |\t" + str(cm[1][0]) + "\t" + str(cm[1][1]) + "\n") if len(clases) == 3: f.write("\t0\t1\t2\n") f.write("--------------------------------\n") f.write("0 |\t" + str(cm[0][0]) + "\t" + str(cm[0][1]) + "\t" + str(cm[0][2]) + "\n") f.write("1 |\t" + str(cm[1][0]) + "\t" + str(cm[1][1]) + "\t" + str(cm[1][2]) + "\n") f.write("2 |\t" + str(cm[2][0]) + "\t" + str(cm[2][1]) + "\t" + str(cm[2][2]) + "\n") if len(clases) == 4: f.write("\t0\t1\t2\t3\n") f.write("----------------------------------------------\n") f.write("0 |\t" + str(cm[0][0]) + "\t" + str(cm[0][1]) + "\t" + str(cm[0][2]) + "\t" + str(cm[0][3]) + "\n") f.write("1 |\t" + str(cm[1][0]) + "\t" + str(cm[1][1]) + "\t" + str(cm[1][2]) + "\t" + str(cm[1][3]) + "\n") f.write("2 |\t" + str(cm[2][0]) + "\t" + str(cm[2][1]) + "\t" + str(cm[2][2]) + "\t" + str(cm[2][3]) + "\n") f.write("3 |\t" + str(cm[3][0]) + "\t" + str(cm[3][1]) + "\t" + str(cm[3][2]) + "\t" + str(cm[3][3]) + "\n") f.write("\n\n") f.write("************************ REPORT ***********************\n") f.write(report) f.close() print(str(codigo) + ": tree created") return (arbol, arbol.score(X_test, y_test))
train_loader = DataLoader(training, batch_size=train_size, shuffle=True) val_loader = DataLoader(val, batch_size=val_size, shuffle=False) print("Dataset loaded!") trees = [] for i, (feature, label) in enumerate(train_loader): for max_depth in (2, 3, 5, 7, 10): print("Max depth: {}".format(max_depth)) clf = tree.DecisionTreeClassifier(max_depth=max_depth) clf = clf.fit(np.array(feature), np.array(label)) training_predictions = clf.predict(np.array(feature)) training_accuracy = metrics.accuracy_score(np.array(label), training_predictions) for j, (val_feature, val_label) in enumerate(val_loader): validation_predictions = clf.predict(np.array(val_feature)) validation_accuracy = metrics.accuracy_score(np.array(val_label), validation_predictions) confusion_matrix = str(metrics.confusion_matrix(np.array(val_label), validation_predictions)) print(training_accuracy) print(validation_accuracy) print(confusion_matrix) import pdb pdb.set_trace() r = export_text(clf, feature_names=('max_speed', 'max_dec', 'max_acc', 'max_acc_total')) print(r) # model = SafetyFeatureModel(4, 2) # weight = torch.tensor([1, 3], dtype=torch.float) # if torch.cuda.is_available(): # model.cuda() # weight = weight.cuda() # loss_function = nn.CrossEntropyLoss(weight) # optimizer = optim.SGD(model.parameters(), lr=0.00001) # train(model, train_loader, val_loader, loss_function, optimizer, 500)
extended_features_names = [ '(!!data.breathingProblems or !!data.fever or !!data.cough)', '(!!data.breathingProblems and !!data.fever and !!data.cough)' ] def compute_extended_features(features): return [ min(1, features[1] + features[2] + features[3]), features[1] * features[2] * features[3] ] data = [] classes = [] with open('./tree.tsv', 'r') as f: for line in f: split = line.rstrip().split('\t') features = [1 if val == "Oui" else 0 for val in split[:-1]] extended_features = compute_extended_features(features) data.append(features + extended_features) classes.append(classes_indexes[split[-1]]) clf = tree.DecisionTreeClassifier(criterion='gini') clf = clf. res = export_text(clf, feature_names=base_features_names + extended_features_names) res = res.replace('---', 'if ').replace(' > 0.50', ' is true:').replace('<= 0.50', 'is false:') print(res)
train_target = np.delete(iris.target, test_idx) train_data = np.delete(iris.data, test_idx, axis=0) # test data test_target = iris.target[test_idx] test_data = iris.data[test_idx] classifier = tree.DecisionTreeClassifier() classifier.fit(train_data, train_target) print(test_target) print(classifier.predict(test_data)) #dot_data = tree.export_graphviz(classifier, out_file=None, # feature_names=iris.feature_names, # class_names=iris.target_names, # filled=True, rounded=True, # special_characters=True) #graph = graphviz.Source(dot_data) #graph.render("iris") print(iris.feature_names, iris.target_names) print(test_data[0], test_target[0]) print(test_data[1], test_target[1]) print(test_data[2], test_target[2]) r = export_text(classifier, feature_names=iris['feature_names']) print("\n\n Text Decision Tree\n ******************\n") print(r)
clf = DecisionTreeClassifier() # Train Decision Tree Classifer clf = clf.fit(X_train, y_train) #Predict the response for test dataset y_pred = clf.predict(X_test) # To plot tree tree.plot_tree(clf) # To export tree in pdf file dot_data = tree.export_graphviz(clf, out_file=None) graph = graphviz.Source(dot_data) graph.render("D:/diabeteas") # To plot tree in colored format # Insted of X_train we cam use feature_cols also directly dot_data = tree.export_graphviz(clf, out_file=None, feature_names=X_train.columns, class_names=pima.label, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dot_data) graph # To export tree in text format r = export_text(clf, feature_names=feature_cols) print(r)
def trea(X, y): decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2) decision_tree = decision_tree.fit(X, y) r = export_text(decision_tree) print(r)
def classify_reviews(self): """ Classifies each review into either iOS 13-related or not iOS 13-related using a naive REGEX approach and a more sophisticated machine learning approach. """ df = pd.read_csv(DataProcessor.dir_name + "/../../data_files/processed_dataset.csv") df = df.fillna(value={'document': " "}) df["re_flag"] = None # regular expression flag df["ml_flag"] = None # machine learning flag # regular expression approach for explicit iOS 13 references regex_string = r"((ios|iso|ois|osi|sio|soi|os)\s*13)|(13\s*(ios|iso|ois|osi|sio|soi|os))" df["re_flag"] = df.document.str.contains(regex_string,regex=True) # machine learning approach for implicit iOS 13 references # create training data set data = df sample_size = 21700 data = pd.concat([data.sample(sample_size), data[data["re_flag"] == True]]).drop_duplicates().reset_index(drop=True) data = shuffle(data) # create tfidf vector as feature vector and another class label vector tfidf_vectorizer = TfidfVectorizer(ngram_range=(1, 2)) x_tfidf_vector = tfidf_vectorizer.fit_transform(data["document"]) y_vector = np.where(data['re_flag']==True, 1, 0) # create the same vectors for the entire dataset data = shuffle(df) data = data.fillna(value={'document': " "}) x = tfidf_vectorizer.transform(data["document"]) y = np.where(data['re_flag']==True, 1, 0) # split data into train and test set x_train, x_test, y_train, y_test = train_test_split(x_tfidf_vector, y_vector, test_size=0.2, shuffle=False) # train multinomial naive Bayes model nb = MultinomialNB().fit(x_train, y_train) y_predicted = nb.predict(x_test) print("MultinomialNB") print(classification_report(y_test,y_predicted)) print(pd.crosstab(y_test, y_predicted, rownames=['True'], colnames=['Predicted'], margins=True)) print(accuracy_score(y_test, y_predicted)) y_hats = nb.predict(x) print(classification_report(y,y_hats)) print(pd.crosstab(y,y_hats, rownames=['True'], colnames=['Predicted'], margins=True)) print(accuracy_score(y,y_hats)) # train logistic regression model lr = LogisticRegression(solver='lbfgs').fit(x_train, y_train) y_predicted = lr.predict(x_test) print("LogisticRegression") print(classification_report(y_test,y_predicted)) print(pd.crosstab(y_test, y_predicted, rownames=['True'], colnames=['Predicted'], margins=True)) print(accuracy_score(y_test, y_predicted)) y_hats = lr.predict(x) print(classification_report(y,y_hats)) print(pd.crosstab(y,y_hats, rownames=['True'], colnames=['Predicted'], margins=True)) print(accuracy_score(y,y_hats)) # train random forest model rf = RandomForestClassifier(n_estimators=50).fit(x_train, y_train) y_predicted = rf.predict(x_test) print("RandomForestClassifier") print(classification_report(y_test,y_predicted)) print(pd.crosstab(y_test, y_predicted, rownames=['True'], colnames=['Predicted'], margins=True)) print(accuracy_score(y_test, y_predicted)) y_hats = rf.predict(x) print(classification_report(y,y_hats)) print(pd.crosstab(y,y_hats, rownames=['True'], colnames=['Predicted'], margins=True)) print(accuracy_score(y,y_hats)) feature_names = tfidf_vectorizer.get_feature_names() # print randomly choosen decision tree of random forest model estimator = rf.estimators_[random.randrange(0, 50)] tree_rules = export_text(estimator, feature_names=feature_names) print(tree_rules) # conduct five-fold cross validation models = [ RandomForestClassifier(n_estimators=50), MultinomialNB(), LogisticRegression(), ] cv_fold = 5 cv_df = pd.DataFrame(index=range(cv_fold * len(models))) entries = [] for model in models: model_name = model.__class__.__name__ accuracies = cross_val_score(model, x_tfidf_vector, y_vector, scoring='accuracy', cv=cv_fold) for fold_idx, accuracy in enumerate(accuracies): entries.append((model_name, fold_idx, accuracy)) cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy']) # visualization of results # print boxplots with model accuracies sns.set(rc={'figure.figsize':(14,6)}) sns.set_style('whitegrid', {'font.family':'serif', 'font.serif':'Times New Roman'}) plot = sns.boxplot(x='accuracy', y='model_name', color="0.90", data=cv_df, order=["MultinomialNB","LogisticRegression","RandomForestClassifier"], orient="h", linewidth=3) sns.swarmplot(x='accuracy', y='model_name', data=cv_df, size=10, edgecolor="gray", color="black", linewidth=1, order=["MultinomialNB", "LogisticRegression", "RandomForestClassifier"], orient="h") plot.set_xlabel("Accuracy",fontsize=25) plot.set_ylabel("Model name",fontsize=25) plot.tick_params(labelsize=20) # store predictions of the two classification approaches data.loc[:,"ml_flag"] = y_hats # machine learning predictions data["re_flag"] = data["re_flag"].astype(int) # regex predictions data = data.reset_index(drop=True) data.to_csv(DataProcessor.dir_name + "/../../data_files/processed_dataset.csv", encoding="utf-8", index=False) return df
#noBlanks = df[df.apply(lambda x: x.count(), axis=1) > 44] x = df[xCols] y = df["Stuck"] clf = tree.DecisionTreeClassifier(max_depth=3) clf = clf.fit(x, y) fig, ax = plt.subplots() fig.set_figheight(12) fig.set_figwidth(15) tree.plot_tree(clf, ax=ax) #dot_data = tree.export_graphviz(clf, out_file=None) #graph = graphviz.Source(dot_data) r = export_text(clf, feature_names=xCols) print(r) print(confusion_matrix(y, clf.predict(x))) # Compute ROC curve and ROC area for each class fpr = [] tpr = [] roc_auc = dict() plt.figure() # ROC curve for different max depths for tree for i in range(2,10): clf = tree.DecisionTreeClassifier(max_depth=i) clf = clf.fit(x, y)
clf.predict([[2., 2.]]) clf.predict_proba([[2., 2.]]) X, y = load_iris(return_X_y=True) clf = tree.DecisionTreeClassifier() clf = clf.fit(X, y) tree.plot_tree(clf.fit(iris.data, iris.target)) dot_data = tree.export_graphviz(clf, out_file=None) graph = graphviz.Source(dot_data) graph.render("iris") dot_data = tree.export_graphviz(clf, out_file=None, feature_names=iris.feature_names, class_names=iris.target_names, filled=True, rounded=True, special_characters=True) graph = graphviz.Source(dot_data) graph iris = load_iris() decision_tree = DecisionTreeClassifier(random_state=0, max_depth=2) decision_tree = decision_tree.fit(iris.data, iris.target) r = export_text(decision_tree, feature_names=iris['feature_names']) print(r)
print('Accuracy: %.2f%%' % (100.0 * tree_model.score(X_test, y_test))) # decision boundary X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((y_train, y_test)) train_len = X_train.shape[0] combined_len = X_combined.shape[0] plt.figure(figsize=(3, 3), dpi=300) plot_decision_regions(X=X_combined, y=y_combined, classifier=knn_model, test_idx=range(train_len, combined_len)) plt.xlabel('petal length [cm]') plt.ylabel('petal width [cm]') plt.legend(loc='upper left') plt.tight_layout() #plt.savefig('images/03_01.png', dpi=300) plt.show() # plot the tree from sklearn.tree.export import export_text print(export_text(tree_model, feature_names=list(X.columns))) from sklearn.tree import plot_tree plt.figure(figsize=(6, 6), dpi=300) plot_tree(tree_model, filled=True) plt.show() # Attribute Importance importances = pd.DataFrame({'feature':X_train.columns,'importance':np.round(tree_model.feature_importances_,3)}) importances = importances.sort_values('importance',ascending=False)
#set up matplot figure fig, ax = plt.subplots(figsize=(8, 8)) #plot the decision tree tree.plot_tree(clf_final, feature_names=list(x_train), filled=True) plt.savefig('{}/decision_tree_{}.png'.format(path, args.train), dpi=400) #save the figure #alternative way to plot the decision tree by manually entering the size and colour of the arrows # out = tree.plot_tree(clf_final, feature_names=list(x_train), filled = True) # for o in out: # arrow = o.arrow_patch # if arrow is not None: # arrow.set_edgecolor('black') # arrow.set_linewidth(3) #plot and output the decision tree as text tree_rules = export_text(clf_final, feature_names=list(X.columns)) print(tree_rules) #extract feature importances fi = pd.DataFrame({'feature': list(x_train.columns), 'importance': clf_final.feature_importances_}).\ sort_values('importance', ascending = False) print(fi.head()) #set up matplot figure fig = plt.figure(figsize=(8, 8)) #plot bar chart showing feature importances sns.barplot(x=fi.feature, y=fi.importance) #add labels to the graph plt.xlabel('Features') #rename the x-axis title