from sklearn.svm import SVC from sklearn.cross_validation import cross_val_score from sklearn.cross_validation import StratifiedShuffleSplit from sklearn.grid_search import GridSearchCV import re from astropy.io.fits.header import Header from matplotlib.pyplot import xlim import sys sys.path.append( '/home/peng/git/Machine_learning_for_reliability_analysis/Preprocess') import Preprocessdata import RFclass ## default setting data_path = '../Data/' p = Preprocessdata.standardprocess() # class class MyClass(object): def __init__(self, Training_data): df = pd.read_csv(data_path + Training_data, header=0) self.train, self.trainlabel, self.test, self.testlabel = p.noscale( df, 1.0) print('This dataset contains %r samples with %r features' % (np.shape(self.train)[0], np.shape(self.train)[1])) print('These features are:') for i in xrange(0, np.shape(self.train)[1]): print('Feature %r : %r' % (i + 1, list(df.columns.values)[i]))
from sklearn.svm import SVC from sklearn.cross_validation import cross_val_score from sklearn.cross_validation import StratifiedShuffleSplit from sklearn.grid_search import GridSearchCV import re from astropy.io.fits.header import Header from evolutionary_search import EvolutionaryAlgorithmSearchCV from sklearn.cross_validation import StratifiedKFold import random start = timeit.default_timer() df =pd.read_csv('Source_Data.csv', header=0) p= Preprocessdata.standardprocess() train, trainlabel, test, testlabel = p.scaledivd(df, 1.0) print np.shape(train) C_range=np.logspace(-10, 10, num=21, base=2,endpoint= True) gamma_range=np.logspace(-10, 10, num=21, base=2,endpoint= True) ####################################################################### paramgrid = {"kernel":["poly"], "C":C_range, "gamma":gamma_range, "degree":[3]
def main(): start = timeit.default_timer() df = pd.read_csv('/home/peng/new160half.csv', header=0) # df['random_number']=np.random.random(size = 160) # df_sort = df.sort(columns='random_number') # df_sort.drop(['random_number'], inplace = True, axis = 1) # df_sort.to_csv('new_random_160.csv', header = 0) p = Preprocessdata.standardprocess() # # df_2 = pd.read_csv('/home/peng/git/Machine_learning_for_reliability_analysis//Test_1/score_long_2features_rf.csv', header=0) #------------------------------------------------------------------------------ train, trainlabel, test, testlabel = p.noscale(df, 0.9) # train, trainlabel = p.noaction(df) # # df_2 = pd.read_csv('/home/peng/git/Machine_learning_for_reliability_analysis/score50-500_2.csv', header=0) ff = RFclass.training() tt = RFclass.test() feature_range = np.arange(12, 13, 1) tree_range = np.arange(700, 701, 1) #####################sensitivity for 10cv############## #---------------------------------------------------------------- score = [] #------------------------------------------------------ for i in range(100): # score.append(ff.trainman_sensitivity_CV('adb', train, trainlabel, tree_range, feature_range)) #-------------------------------------------------------------- # print score #- df_raw_times = pd.DataFrame({'times':np.arange(1,101,1), 'scores':score}) #------------------------------------------------------------------------------ #----------------------- df_raw_times = ff.str_float(df_raw_times['scores']) #------------------------------------------------------------------------------ #---------------------------------- df_acc_times = ff.accuracy(df_raw_times) #-- df_acc_times.to_csv('adb_acc_10cv_f12_t700_100times.csv', header = True) """Just separate""" #=========================================================================== # forest= ff.trainforest('ext', train, trainlabel,1900,9) # y_pred = forest.predict(test) # print metrics.precision_score(testlabel,y_pred) # cm = metrics.confusion_matrix(testlabel, y_pred) # tt.plot_confusion_matrix(cm) #=========================================================================== """the CART single tree""" forest = ff.trainforest('cart', train, trainlabel, 20, 1) y_pred = forest.predict(test) print metrics.accuracy_score(testlabel, y_pred) print metrics.precision_score(testlabel, y_pred) cm = metrics.confusion_matrix(testlabel, y_pred) tt.plot_confusion_matrix(cm) #---------------------------------------------------------------- score = [] #------------------------------------------------------ for i in range(100): #----------- forest = ff.trainforest('adb', train, trainlabel, 1450, 11) #----------------------------------------- y_pred = forest.predict(test) #--------------- score.append(metrics.accuracy_score(testlabel, y_pred)) #------------------------------------------------------------------------------ #--------- df=pd.DataFrame({'times': np.arange(1,101,1), 'acc_score':score}) #--------------- df.to_csv('adb_acc_63_f12_t1450_100times.csv', header=True) #------------------------------------------------------- print df.describe() #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ # df = pd.read_csv('/home/peng/git/Machine_learning_for_reliability_analysis/Test_1/adb_acc_63_f12_t1450_100times.csv', header=0) #------------------------------------ plt.plot(df['times'], df['acc_score']) # plt.xticks(np.arange(1,101,1),np.arange(1,101,1)) # plt.xlabel() plt.show() ############################################################################################################################ #------------------------------------- df_66_33 = {'tree_tange': tree_range} #---------------------------------------------- df_all = DataFrame(df_66_33) # scores = ff.trainonlyfeat('bag', train, trainlabel, tree_range, feature_range) # scores.to_csv('bag_100_4000_10times.csv', header=True) # data = ff.train_repeat_forest_metrics('bag', train, trainlabel, test, testlabel, tree_range, feature_range, 10) #---------------- data.to_csv('nnnnnn_crazy66_33_100_4000.csv', header=True) #------------------------------------------------------------------------------ # data = ff.train_repeat_forest_metrics('adb', train, trainlabel, test, testlabel, tree_range, feature_range, 10) #------------ data.to_csv('nnnnnnnnnn_crazy66_33_100_4000.csv', header=True) # data = ff.train_repeat_forest_metrics('gbt', train, trainlabel, test, testlabel, tree_range, feature_range, 10) #------------------- data.to_csv('gbt_crazy66_33_100_4000.csv', header=True) #-- data = ff.trainmanCV('rf', train, trainlabel, tree_range, feature_range) #-------------------------- data.to_csv('rf_crazy100_4000.csv', header=True) #------------------------------------------------------------------------------ #- data = ff.trainmanCV('ext', train, trainlabel, tree_range, feature_range) #------------------------- data.to_csv('ext_crazy100_4000.csv', header=True) #- data = ff.trainmanCV('bag', train, trainlabel, tree_range, feature_range) #------------------------ data.to_csv('bag_crazy100_4000n.csv', header=True) #------------------------------------------------------------------------------ #- data = ff.trainmanCV('adb', train, trainlabel, tree_range, feature_range) #------------------------ data.to_csv('adb_crazy100_4000n.csv', header=True) #- data = ff.trainmanCV('gbt', train, trainlabel, tree_range, feature_range) #------------------------ data.to_csv('gbt_crazy100_4000n.csv', header=True) # scores.to_csv('rf_1_5_1_feature4.csv', header=False) # print scores stop = timeit.default_timer() print "The running takes %r min" % ((stop - start) / 60)