Example #1
0
def show_coefficients(classifier,
                      clf,
                      feature_names,
                      filename,
                      top_features=20):
    if classifier == "svml":
        coef = clf.coef_.ravel()
    elif classifier == "rf":
        coef = clf.feature_importances_
    elif classifier == "dtree":
        export_graphviz(clf,
                        out_file=(filename + '.dot'),
                        feature_names=feature_names)
        coef = clf.feature_importances_
    else:
        return
    top_positive_coefficients = np.argsort(coef)[-top_features:][::-1]

    #top_negative_coefficients = np.argsort(coef)[:top_features]
    #top_coefficients = np.hstack([top_negative_coefficients, top_positive_coefficients])
    feature_names = np.array(feature_names)
    print(
        list(
            zip(feature_names[top_positive_coefficients],
                map(lambda x: x, sorted(coef, reverse=True)))))
Example #2
0
def export_tree(*data):
    '''
    输出决策图
    :return: None
    '''
    X_train, X_test, y_train, y_test = data
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    # 然后通过Graphviz的dot工具,在命令行中运行命令
    # dot.exe -Tpdf F:/out -o F:/out.pdf生成pdf格式的决策树,
    # 或者执行dot.exe -Tpng F:/out -o F:/out.png来生成陪png格式的决策图。
    # 其中-T选项指定了输出文件格式,-o选项指定了输出文件名
    export_graphviz(clf, "F:/out")
Example #3
0
def trainDecisionTree(X, y, max_depth=16, step_size=1):
    depths = range(1, max_depth+1, step_size)
    scores = np.empty(len(depths))

    def validatorDT(X, y, scores, depth, i):
        model = DecisionTreeClassifier(max_depth=depth, criterion='entropy', random_state=1)
        scores[i] = cross_val_score(model, X, y, cv=NUM_CV_FOLDS).mean()
    
    threads = []
    i = 0
    for d in depths:
        threads.append(Thread(target=validatorDT, args = (X, y, scores, d, i)))
        threads[i].start()
        i = i + 1

    for t in threads:
        t.join()

    best_depth = depths[np.argmax(scores)]

    model = DecisionTreeClassifier(max_depth=best_depth, criterion='entropy', random_state=1)
    model = model.fit(X,y)

    dot_data = export_graphviz(model, out_file=None) 
    graph = graphviz.Source(dot_data)
    graph.render("decisionTree")

    return model, best_depth
Example #4
0
def Tree_freture_importance_plot(X, y):
    regr = DecisionTreeRegressor()
    feat_labels = X.columns[:]
    regr.fit(X, y)
    out = export_graphviz(regr)
    graph = pydotplus.graph_from_dot_data(out)
    graph.write_pdf('tree.pdf')
Example #5
0
def convert_decision_tree_to_ipython_image(clf, feature_names=None, class_names=None,
                                           image_filename=None, tmp_dir=None):
    dot_filename = mkstemp(suffix='.dot', dir=tmp_dir)[1]
    with open(dot_filename, "w") as out_file:
        export_graphviz(clf, out_file=out_file,
                        feature_names=feature_names,
                        class_names=class_names,
                        filled=True, rounded=True,
                        special_characters=True)

    from IPython.display import Image

    image_filename = image_filename or ('%s.png' % dot_filename)

    subprocess.call(('dot -Tpng -o %s %s' %
                     (image_filename, dot_filename)).split(' '))
    image = Image(filename=image_filename)
    os.remove(dot_filename)
    return image
Example #6
0
def show_coefficients(classifier, clf, feature_names, top_features=20):
    if classifier == "svml":
        coef = clf.coef_.ravel()
    elif classifier == "rf":
        if isinstance(clf, Pipeline):
            clf = clf.named_steps['clf']
        coef = clf.feature_importances_
    elif classifier == "dt":
        export_graphviz(clf, out_file='tree.dot', feature_names=feature_names)
        coef = clf.feature_importances_
    elif classifier == 'xgb':
        coef = clf.feature_importances_
    else:
        return
    top_positive_coefficients = np.argsort(coef)[-top_features:][::-1]

    # top_negative_coefficients = np.argsort(coef)[:top_features]
    # top_coefficients = np.hstack([top_negative_coefficients, top_positive_coefficients])

    feature_names = np.array(feature_names)
    print(
        list(
            zip(feature_names[top_positive_coefficients],
                map(lambda x: x, sorted(coef, reverse=True)))))
def dt_run_plot(importance_matirx, group_feature, result, fea_number):

    features_top = importance_matirx.mean().sort_values(
        ascending=False)[:fea_number].index

    feature_matrix = group_feature[features_top]

    #    class_weights = class_weight.compute_class_weight('balanced', np.unique(results), results)

    cla = DecisionTreeClassifier(max_depth=4,
                                 max_features=None,
                                 max_leaf_nodes=None,
                                 min_samples_leaf=0.05)
    #                         , class_weight ={0:class_weights[0], 1:class_weights[1]})

    #    print(class_weights[0], class_weights[1])

    cla = cla.fit(feature_matrix, result)

    dot_data = StringIO()
    out = export_graphviz(cla,
                          out_file=dot_data,
                          filled=True,
                          rounded=True,
                          impurity=True,
                          special_characters=True,
                          proportion=True,
                          feature_names=features_top)

    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())

    Image(graph.create_png())

    graph.write_pdf(
        r"C:\Users\s1883483\Desktop\2018 Rotman Datathon\output\dt_" + names +
        ".pdf")
Example #8
0
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3])

from sklearn.tree import DecisionTreeClassifier, export

classificador = DecisionTreeClassifier(criterion="entropy")

#treinamento do algoritmo - Construindo a Árvore de Decisão
classificador.fit(previsores, classe)

#imprimir a árvore
print(classificador.feature_importances_)

export.export_graphviz(classificador,
                       out_file="arvore.dot",
                       feature_names=[
                           "historia",
                           "divida",
                           "garantia",
                           "renda",
                       ],
                       class_names=["alto", "moderado", "baixo"],
                       filled=True,
                       leaves_parallel=True)

#história boa, dívida alta, garantias nenhuma, renda > 35
#história ruim, dívida alta, garantias adequada, renda < 15
#predict = execução do algoritmo
resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]])
print(resultado)
Example #9
0
#%%[markdown]
# # Árvores de Decisão
# O classificador por árvores de decisão funciona com base em teoria da informação.
# Primeiramente, estima-se o ganho de informacao por cada variavel preditora

#%%
# Algoritmos de arvores de decisao para prever dados
# classificador por arvores de decisao
from sklearn.tree import DecisionTreeClassifier, export
from db_loaders.risco_credito import previsores, risco

# Classificador por arvores de decisao
modelo = DecisionTreeClassifier(criterion='entropy')
modelo.fit(previsores, risco)

export.export_graphviz(
    modelo,
    out_file="assets/arvore.dot",
    feature_names=["Histórico", "Dívida", "Garantias", "Renda"],
    class_names=["Alto", "Moderado", "Baixo"],
    filled=True,
    leaves_parallel=True)

resultado = modelo.predict([[0, 0, 1, 2], [2, 0, 0, 0]])

#%%
def _decision_tree_regression_train(
        table,
        feature_cols,
        label_col,  # fig_size=np.array([6.4, 4.8]), 
        criterion='mse',
        splitter='best',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features=None,
        random_state=None,
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        min_impurity_split=None,
        presort=False,
        sample_weight=None,
        check_input=True,
        X_idx_sorted=None):

    param_validation_check = [
        greater_than_or_equal_to(min_samples_split, 2, 'min_samples_split'),
        greater_than_or_equal_to(min_samples_leaf, 1, 'min_samples_leaf'),
        greater_than_or_equal_to(min_weight_fraction_leaf, 0.0,
                                 'min_weight_fraction_leaf')
    ]
    if max_depth is not None:
        param_validation_check.append(
            greater_than_or_equal_to(max_depth, 1, 'max_depth'))

    validate(*param_validation_check)

    regressor = DecisionTreeRegressor(criterion, splitter, max_depth,
                                      min_samples_split, min_samples_leaf,
                                      min_weight_fraction_leaf, max_features,
                                      random_state, max_leaf_nodes,
                                      min_impurity_decrease,
                                      min_impurity_split, presort)
    regressor.fit(table[feature_cols], table[label_col], sample_weight,
                  check_input, X_idx_sorted)

    try:
        from sklearn.externals.six import StringIO
        from sklearn.tree import export_graphviz
        import pydotplus
        dot_data = StringIO()
        export_graphviz(regressor,
                        out_file=dot_data,
                        feature_names=feature_cols,
                        filled=True,
                        rounded=True,
                        special_characters=True)
        graph = pydotplus.graph_from_dot_data(dot_data.getvalue())

        from brightics.common.repr import png2MD
        fig_tree = png2MD(graph.create_png())
    except:
        fig_tree = "Graphviz is needed to draw a Decision Tree graph. Please download it from http://graphviz.org/download/ and install it to your computer."

    # json
    model = _model_dict('decision_tree_regression_model')
    model['feature_cols'] = feature_cols
    model['label_col'] = label_col
    feature_importance = regressor.feature_importances_
    model['feature_importance'] = feature_importance
    model['max_features'] = regressor.max_features_
    model['n_features'] = regressor.n_features_
    model['n_outputs'] = regressor.n_outputs_
    model['tree'] = regressor.tree_
    get_param = regressor.get_params()
    model['parameters'] = get_param
    model['regressor'] = regressor

    # report

    indices = np.argsort(feature_importance)
    sorted_feature_cols = np.array(feature_cols)[indices]

    plt.title('Feature Importances')
    plt.barh(range(len(indices)),
             feature_importance[indices],
             color='b',
             align='center')
    for i, v in enumerate(feature_importance[indices]):
        plt.text(v,
                 i,
                 " {:.2f}".format(v),
                 color='b',
                 va='center',
                 fontweight='bold')
    plt.yticks(range(len(indices)), sorted_feature_cols)
    plt.xlabel('Relative Importance')
    plt.tight_layout()
    fig_feature_importances = plt2MD(plt)
    plt.clf()

    params = dict2MD(get_param)
    feature_importance_df = pd.DataFrame(data=feature_importance,
                                         index=feature_cols).T

    # Add tree plot

    rb = BrtcReprBuilder()
    rb.addMD(
        strip_margin("""
    | ## Decision Tree Regression Train Result
    | ### Decision Tree
    | {fig_tree}
    |
    | ### Feature Importance
    | {fig_feature_importances}
    |
    | ### Parameters
    | {list_parameters}
    |
    """.format(fig_tree=fig_tree,
               fig_feature_importances=fig_feature_importances,
               list_parameters=params)))
    model['_repr_brtc_'] = rb.get()

    return {'model': model}
devemos mudar a escala dos valores,
pois a diferença de um número para o outro é muito alta!
"""

# ele usa a padronização para ajustar a escala dos valores
scaler = StandardScaler()

# ele ajusta os valores e os aplica
previsores = scaler.fit_transform(previsores)

# dividindo a base dados em testes e treinamento
previsores_treinamento, previsores_teste, attr_classe_treinamento, attr_classe_teste = train_test_split(
    previsores, attr_classe, test_size=0.3, random_state=0)
clf = DecisionTreeClassifier(criterion="entropy")
clf.fit(previsores_treinamento, attr_classe_treinamento)
resultado = clf.predict(previsores_teste)
acuracia = accuracy_score(attr_classe_teste, resultado) * 100
matriz = confusion_matrix(attr_classe_teste, resultado)

### Visualizar a árvore de decisão

export.export_graphviz(
    clf,
    out_file="arvore_decisao.dot",
    feature_names=["income", "age", "loan"],
    class_names=["0", "1"],
    filled=True,
    leaves_parallel=True
)  # a extensao dot é necessaria; nome dos nós; nomes das classes *

# * o nome das classes deve ser posto pela ordem de quem aparece primeiro no dataset.
print("Veri seti Eğitiliyor.......\n")
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

feat_importance = clf.tree_.compute_feature_importances(normalize=False)
print("feature importance = " + str(feat_importance))

from sklearn.tree.export import export_graphviz
from sklearn.feature_selection import mutual_info_classif
from StringIO import StringIO

out = StringIO()
out = export_graphviz(clf, out_file='test')

# In[14]:

print("Eski Test Veriseti Accuracy: "), accuracy_score(y_test, y_pred)

# In[15]:

print("Veri seti Eğitiliyor.......\n")
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

feat_importance = clf.tree_.compute_feature_importances(normalize=False)
print("feature importance = " + str(feat_importance))
Example #13
0
    classLabel_test,
    predictions)  #will compare prredictions with the original classLabel
matrix = confusion_matrix(
    classLabel_test, predictions
)  #confusion matrix ->Primary diagonal: correct classification // Other indexes->Incorrect classification, index [0,1] -> correct answer is 0 but classified as 1,etc

#"age , workclass , final-weight , education , education-num , marital-status , occupation , relationship , race , sex , capital-gain , capital-loos , hour-per-week , native-country , income"
#pred1=classificator.predict([[40,4,200000,12,14,7,4,1,2,1,1000,0,50,39]]) #returns class
#"40,Private,200000,Masters,14,divorced,sales,husband,black,male,1000,0,50,united-states"

export.export_graphviz(
    classificator,  #generate a tree to visualize further, doesnt works with OHE
    out_file='censusTree.dot',
    feature_names=[
        'age', 'workclass', 'final-weight', 'education', 'education-num',
        'marital-status', 'occupation', 'relationship', 'race', 'sex',
        'capital-gain', 'capital-loos', 'hour-per-week', 'native-country'
    ],
    class_names='classLabel',
    filled=True,
    leaves_parallel=True)

#PRECISION RESULTS :
#Entropy/Gain
#With all pre processing = 81.04 %
#Without OHE = 81.28%
#Without scaler = 81.02%
#Gini
#With all pre processing = 81.14 %
#Without OHE = 80.06%
#Without scaler = 81.11%
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3])
#print(previsores)

# 3) Aplicar o treinamento
classificador = DecisionTreeClassifier(criterion='entropy')
classificador.fit(previsores, classe)
print(classificador.feature_importances_
      )  # Exibir a importância de cada atributo
print(
    classificador.score(previsores, classe)
)  # Verificar se o modelo está bom. Quanto mais próximo de 1 (100%) melhor.

# 4) Exportar o gráfico de árvore de decisão para ser lido posteriormente pela ferramenta graphviz
export.export_graphviz(
    classificador,
    out_file=
    'arvore.dot',  # .dot é a extensão utilizada pela ferramenta graphviz
    feature_names=['historia', 'divida', 'garantias', 'renda'],
    class_names=classificador.classes_,
    filled=True,
    leaves_parallel=True)

# 5) Aplicar uma classificação com modelo já treinado, usando os seguintes exemplos:
'''
história boa, dívida alta, garantias nenhuma, renda > 35
história ruim, dívida alta, garantias adequada, renda < 15
'''

resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]])
print(resultado)
Example #15
0
           linewidths=.5,
           fmt="d")
plt.ylabel("Valores reais das notas médias")
plt.xlabel("Valores previstos das notas médias")
plt.savefig('cmadd.png')
plt.show()

#analise do numero de reviews agrupado por rating
y = df.groupby(['Reviews'])['Rating'].mean()
y.sort_values(ascending=False, inplace=True)

#gerador de grafico para Árvore de Decisão
export.export_graphviz(classificador,
                       out_file='arvore_rating_3n.dot',
                       feature_names=[
                           'Reviews', 'Size', 'Installs', 'Content Rating',
                           'Genre', 'Category'
                       ],
                       class_names=['1', '2', '3', '4', '5'],
                       filled=True,
                       leaves_parallel=True)

#Foi feita uma primeira análise com o número de nodos livre e obteve-se precisao de 0.6829643296432965
#Observou-se que houve um overfitting dos dados, dado o numero de nodos
#Decisao: diminuir o numero de nodos para n=3

#precisao = 0.7404674046740467. Observou-se uma melhora de 0.057 na precisao
#conclusao:
#em uma arvore de decisao com 3 nodos, o numero de reviews tem grande relevancia para determinar rating
#obs: o segundo atributo mais relevante, para numero de n=10, mostrou ser 'Installs', com uma melhora de 0.007% na precisao (0.7478474784747847)
Example #16
0
# Realizando pre-processamento dos previsores que são dados categóricos pois
# Decision-Tree não utiliza dados categóricos
labelencoder = LabelEncoder()
previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0])
previsores[:, 1] = labelencoder.fit_transform(previsores[:, 1])
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3])

# Aplicando o algoritmo de Árvores de Decisão
from sklearn.tree import DecisionTreeClassifier, export
classificador = DecisionTreeClassifier(criterion='entropy')
classificador.fit(previsores, classe)

print(classificador.feature_importances_)

export.export_graphviz(
    classificador,
    out_file=
    r'C:\Users\allan\Documents\Python_Machine_Learning_Jones_Granatyr\Decision_Tree\arvore.dot',
    feature_names=['historia', 'divida', 'garantias', 'renda'],
    class_names=['alto', 'moderado', 'baixo'],
    filled=True,
    leaves_parallel=True)

# Realizando a classificação com um dado não incluso nos previsores
# historico bom, divida alta, garantia nenhuma e renda > 15
# historico ruim, divida alta, garantias adequada, renda < 15
resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]])

print(classificador.classes_)
Example #17
0
classe = base.iloc[:, 4].values

from sklearn.preprocessing import LabelEncoder
labelencoder = LabelEncoder()
previsores[:, 0] = labelencoder.fit_transform(previsores[:, 0])
previsores[:, 1] = labelencoder.fit_transform(previsores[:, 1])
previsores[:, 2] = labelencoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelencoder.fit_transform(previsores[:, 3])

from sklearn.tree import DecisionTreeClassifier, export

classificador = DecisionTreeClassifier(criterion='entropy')
classificador.fit(previsores, classe)  # Constroi a árvore de decisão
print(classificador.feature_importances_
      )  # Mostra a importância de cada atributo

export.export_graphviz(
    classificador,
    out_file='arvore.dot',
    feature_names=['Historia', 'Divida', 'Garantias', 'Renda'],
    class_names=['Alto', 'Moderado', 'Baixo'],
    filled=True,
    leaves_parallel=True)
# Histórico boa, divida alta, garantia nenhuma, renda > 35
# Histórico ruim, dívida alta, garantia adequada, renda < 15

resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]])
print(classificador.classes_)
print(classificador.class_count_)
print(classificador.class_prior_)
Example #18
0
entradas_treinamento, entradas_teste, classe_treinamento, classe_teste = train_test_split(
    entradas, classe, test_size=0.30, random_state=0)

# Instnacioando o classificar da arvores de devisão
# Vai gerar uma arrvores de decisão de acordo com o criterio a entropia

classificador = DecisionTreeClassifier(criterion='entropy')
classificador.fit(entradas_treinamento, classe_treinamento)

#print(classificador.feature_importances_)

# Visualização da arvore
export.export_graphviz(classificador,
                       out_file='arvore.dot',
                       feature_names=[
                           'Alternativa', 'Bar', 'Sex/Sab', 'Faminto',
                           'Clientes', 'Preço', 'Chovendo', 'Reserva', 'Tipo',
                           'Espera estimada'
                       ],
                       class_names=['Sim', 'Não'],
                       filled=True,
                       leaves_parallel=True)

# Usa a base de teste para ver o resultado
resultado_teste = classificador.predict(entradas_teste)

# Verificando a quantidade de acerto nos testes
precisao = accuracy_score(classe_teste, resultado_teste)

print(precisao)
Example #19
0
previsores = base.iloc[:, 0:4].values
classe = base.iloc[:, 4].values

from sklearn.preprocessing import LabelEncoder

labelEncoder = LabelEncoder()

base.columns

previsores[:, 0] = labelEncoder.fit_transform(previsores[:, 0])
previsores[:, 1] = labelEncoder.fit_transform(previsores[:, 1])
previsores[:, 2] = labelEncoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelEncoder.fit_transform(previsores[:, 3])

from sklearn.tree import DecisionTreeClassifier, export

classificador = DecisionTreeClassifier(criterion="entropy")
classificador.fit(previsores, classe)
print(classificador.feature_importances_)

export.export_graphviz(
    classificador,
    out_file='arvore_dot',
    feature_names=['historia', 'divida', 'garantias', 'renda'],
    class_names=classificador.classes_,
    filled=True,
    leaves_parallel=True)

resultado = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]])
print(classificador.classes_)
def _decision_tree_classification_train(
        table,
        feature_cols,
        label_col,  # fig_size=np.array([6.4, 4.8]), 
        criterion='gini',
        splitter='best',
        max_depth=None,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_features=None,
        random_state=None,
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        min_impurity_split=None,
        class_weight=None,
        presort=False,
        sample_weight=None,
        check_input=True,
        X_idx_sorted=None):
    classifier = DecisionTreeClassifier(
        criterion, splitter, max_depth, min_samples_split, min_samples_leaf,
        min_weight_fraction_leaf, max_features, random_state, max_leaf_nodes,
        min_impurity_decrease, min_impurity_split, class_weight, presort)
    classifier.fit(table[feature_cols], table[label_col], sample_weight,
                   check_input, X_idx_sorted)

    from sklearn.externals.six import StringIO
    from sklearn.tree import export_graphviz
    import pydotplus
    dot_data = StringIO()
    export_graphviz(classifier,
                    out_file=dot_data,
                    feature_names=feature_cols,
                    class_names=table[label_col].astype('str').unique(),
                    filled=True,
                    rounded=True,
                    special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())

    from brightics.common.report import png2MD
    fig_tree = png2MD(graph.create_png())

    # json
    model = _model_dict('decision_tree_classification_model')
    model['feature_cols'] = feature_cols
    model['label_col'] = label_col
    model['classes'] = classifier.classes_
    feature_importance = classifier.feature_importances_
    model['feature_importance'] = feature_importance
    model['max_features'] = classifier.max_features_
    model['n_classes'] = classifier.n_classes_
    model['n_features'] = classifier.n_features_
    model['n_outputs'] = classifier.n_outputs_
    model['tree'] = classifier.tree_
    get_param = classifier.get_params()
    model['parameters'] = get_param
    model['classifier'] = classifier

    # report

    indices = np.argsort(feature_importance)
    sorted_feature_cols = np.array(feature_cols)[indices]

    plt.title('Feature Importances')
    plt.barh(range(len(indices)),
             feature_importance[indices],
             color='b',
             align='center')
    for i, v in enumerate(feature_importance[indices]):
        plt.text(v,
                 i,
                 " {:.2f}".format(v),
                 color='b',
                 va='center',
                 fontweight='bold')
    plt.yticks(range(len(indices)), sorted_feature_cols)
    plt.xlabel('Relative Importance')
    plt.xlim(0, 1.1)
    plt.tight_layout()
    fig_feature_importances = plt2MD(plt)
    plt.clf()

    params = dict2MD(get_param)
    feature_importance_df = pd.DataFrame(data=feature_importance,
                                         index=feature_cols).T

    # Add tree plot

    rb = ReportBuilder()
    rb.addMD(
        strip_margin("""
    | ## Decision Tree Classification Train Result
    | ### Decision Tree
    | {fig_tree}
    |
    | ### Feature Importance
    | {fig_feature_importances}
    |
    | ### Parameters
    | {list_parameters}
    |
    """.format(fig_tree=fig_tree,
               fig_feature_importances=fig_feature_importances,
               list_parameters=params)))
    model['report'] = rb.get()

    return {'model': model}
Example #21
0
sns.barplot("Features", "Importance", data=importances_gb.sort_values(by='Importance', ascending=False), color="darkorange", alpha=0.6, ax=axs[1,1])
axs[1,1].set_xlabel("Features")
axs[1,1].set_title("Gradient Boosting Importances")

plt.tight_layout()
plt.savefig('Importances.png')
plt.show()

#Graphviz for tree visualization
dtree = DecisionTreeClassifier() #no parameters
dtree = dtree.fit(features_train, target_train)

export.export_graphviz(dtree,
                       out_file = 'dtree_maxdepth.dot',
                       feature_names = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Has_Cabin', 'FamilySize', 'Title'],
                       class_names = True,
                       filled = True,
                       rounded = True,
                       leaves_parallel=True)

#Parameterized tree
pdtree = DecisionTreeClassifier(
        random_state = 1,
        max_depth = 7, 
        min_samples_split = 2,
        criterion='entropy')
pdtree = pdtree.fit(features_train, target_train)

export.export_graphviz(pdtree,
                       out_file = 'dtree_7depth.dot',
                       feature_names = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Has_Cabin', 'FamilySize', 'Title'],
Example #22
0
sn.set(font_scale=1.4)  # for label size
sn.heatmap(df_cm,
           xticklabels=['Negativo', 'Neutro', 'Positivo'],
           yticklabels=['Negativo', 'Neutro', 'Positivo'],
           annot=True,
           annot_kws={"size": 14},
           linewidths=.5,
           fmt="d")  # font size
plt.ylabel("Classificação real dos sentimentos")
plt.xlabel("Classificação prevista dos sentimentos")
plt.savefig('cmaddr.png')
plt.show()

#gerador de grafico para Árvore de Decisão
export.export_graphviz(
    classificador,
    out_file='arvore_reviews3n.dot',
    feature_names=['Sentiment_Subjectivity', 'Sentiment_Polarity'],
    class_names=['Negative', 'Neutral', 'Positive'],
    filled=True,
    leaves_parallel=True)

#foi feita uma primeira análise com o número de nodos livre e obteve-se precisao de 0.9986641731231632
#observou-se que houve um overfitting dos dados devido ao numero de nodos
#decisao: diminuir o numero de nodos para k=3

#precisao = 0.998797755810847
#conclusao:
#em uma arvore de decisao com 3 nós, houve um ganho de 0.001% de precisao, o que mostra que o atributo de polaridade
#é um fator decisivo para se prever o sentimento
Example #23
0
# In[12]:

classificador.fit(previsores, classe)

# In[15]:

# Verificando a importância de cada atributo
print(classificador.feature_importances_)

# In[17]:

# Criando arquivo para visualização gráfica da árvore de decisão criada
export.export_graphviz(
    classificador,
    out_file='arvore.dot',
    feature_names=['historia', 'divida', 'garantias', 'renda'],
    class_names=['alto', 'moderado', 'baixo'],
    filled=True,
    leaves_parallel=True)

# In[18]:

resultados = classificador.predict([[0, 0, 1, 2], [3, 0, 0, 0]])

# In[19]:

resultados

# In[22]:

print(classificador.classes_)
Example #24
0
    predictors)  #transforms and fit predictors returning updated df

from sklearn.model_selection import train_test_split
predictors_training, predictors_test, classLabel_training, classLabel_test = train_test_split(
    predictors, classLabel, test_size=0.25, random_state=0)

###################################### Algorithm ######################################
from sklearn.tree import DecisionTreeClassifier, export
classificator = DecisionTreeClassifier(criterion='entropy')
classificator.fit(predictors, classLabel)
print(classificator.feature_importances_)

export.export_graphviz(
    classificator,  #generate a tree to visualize further
    out_file='creditdataTree.dot',
    feature_names=['income', 'age', 'loan'],
    class_names='classLabel',
    filled=True,
    leaves_parallel=True)

#                             Income,   Age,    Loan
pred1 = classificator.predict([[50000.00, 60.00, 10000.00]])  #returns class=0

#Now for whole predictors_test
predictions = classificator.predict(predictors_test)

from sklearn.metrics import confusion_matrix, accuracy_score
precision = accuracy_score(
    classLabel_test,
    predictions)  #will compare prredictions with the original classLabel
matrix = confusion_matrix(