analyzer = Analyzer(datestr=date_str) loader = Loader(date_str) # ArmInt_cluster = loader.load_excel(filename='ArmInt_cluster',foldername='Cluster') # ArmInt_cluster.drop(['PanneDelai_1'], axis=1,inplace=True) # feature_names = np.array(list(ArmInt_cluster.columns)) # # clf = loader.load_pickle('Randomforest_Armoire') # analyzer.plot_feature_importance(importances=clf.best_estimator_.feature_importances_,featurenames=feature_names,title='Randomforest_featureimportance_Armoire',top_n=40) # # clf = loader.load_pickle('GradientBoosting_Armoire') # analyzer.plot_feature_importance(importances=clf.best_estimator_.feature_importances_,featurenames=feature_names,title='GradientBoosting_featureimportance_Armoire',top_n=40) # PL_cluster = loader.load_excel(filename='PL_cluster',foldername='Cluster') PL_cluster.drop(['PanneDelai_1'], axis=1,inplace=True) feature_names = np.array(list(PL_cluster.columns)) clf = loader.load_pickle('Randomforest_PL') analyzer.plot_feature_importance(importances=clf.best_estimator_.feature_importances_,featurenames=feature_names,title='Randomforest_featureimportance_PL',top_n=40) clf = loader.load_pickle('GradientBoosting_PL') analyzer.plot_feature_importance(importances=clf.best_estimator_.feature_importances_,featurenames=feature_names,title='GradientBoosting_featureimportance_PL',top_n=40)
# cor_PL = PL_cluster.corr() # cor_plot_PL = sns.heatmap(cor_PL, square = True).get_figure() # cor_plot_PL.savefig(os.path.join(saver.datasavedir,'img','clustering','correlation_PL.jpg')) """ random forest """ # ArmInt_cluster = loader.load_excel(filename='ArmInt_cluster',foldername='Cluster') # ArmInt_num = loader.load_excel(filename='ArmInt_encode_num',foldername='Encode/Armoire') # ArmInt_num.reset_index(drop=True,inplace=True) # ArmInt_cluster[['PanneDelai_1','DelaiInt_1','PanneDelai_2','DelaiInt_2']] = ArmInt_num[['PanneDelai_1','DelaiInt_1','PanneDelai_2','DelaiInt_2']] # # print(ArmInt_cluster[['PanneDelai_1','DelaiInt_1','PanneDelai_2','DelaiInt_2']].head()) # y = pd.DataFrame(ArmInt_cluster['PanneDelai_1']).values # ArmInt_cluster.drop(['PanneDelai_1'], axis=1,inplace=True) # X = ArmInt_cluster.values # # modeler.train_RandomForest(X=X,y=y,title='Armoire') # modeler.train_GradientBoosting(X=X,y=y,title='Armoire') PL_cluster = loader.load_excel(filename='PL_cluster',foldername='Cluster') PL_num = loader.load_excel(filename='PLInt_encode_num',foldername='Encode/PL') PL_num.reset_index(drop=True,inplace=True) PL_cluster[['PanneDelai_1','DelaiInt_1','PanneDelai_2','DelaiInt_2']] = PL_num[['PanneDelai_1','DelaiInt_1','PanneDelai_2','DelaiInt_2']] # print(ArmInt_cluster[['PanneDelai_1','DelaiInt_1','PanneDelai_2','DelaiInt_2']].head()) y = pd.DataFrame(PL_cluster['PanneDelai_1']).values PL_cluster.drop(['PanneDelai_1'], axis=1,inplace=True) X = PL_cluster.values modeler.train_RandomForest(X=X,y=y,title='PL') modeler.train_GradientBoosting(X=X,y=y,title='PL')