if __name__ == '__main__': trainsize = 5000 testsize = 5000 numruns = 3 classalgs = { #'Random': algs.Classifier(), 'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}), 'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}), 'Linear Regression': algs.LinearRegressionClass(), 'Logistic Regression Reg': algs.LogitReg({ 'regularizer': 'l2', 'lamb': 0.001, 'stepsize': 0.001 }), 'Logistic Regression': algs.LogitReg({ 'lamb': 0.001, 'stepsize': 0.001 }), 'kernel Logistic Regression': algs.KernelLogitReg({'k': 30}), 'Hamming kernel Logistic Regression': algs.KernelLogitReg({ 'kernel': 'hamming', 'k': 20 }), 'Neural Network':
if __name__ == '__main__': trainsize = 5000 testsize = 5000 numruns = 10 classalgs = { #'Random': algs.Classifier(), #'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}) #'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}) #'Linear Regression': algs.LinearRegressionClass(), #'Logistic Regression': algs.LogitReg() #'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}) #'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}) 'ElasticNet Logistic Regression': algs.LogitReg({'regularizer': 'ElasticNet'}) #'Logistic Alternative': algs.LogitRegAlternative() #'Neural Network': algs.NeuralNet({'epochs': 1}) } numalgs = len(classalgs) parameters = ( { 'regwgt': 0.0, 'nh': 4 }, { 'regwgt': 0.01, 'nh': 8 }, {
for i in range(len(ytest)): if ytest[i] == predictions[i]: correct += 1 return (correct / float(len(ytest))) * 100.0 def loadsusy(): dataset = np.genfromtxt( 'C:\\Users\\Nandini\\Documents\\Textbooks\Project BD\\Classifiers-implemented-master\\output2.csv', delimiter=',') trainset, testset = splitdataset(dataset) return trainset, testset if __name__ == '__main__': trainset, testset = loadsusy() print('Running on train={0} and test={1} samples').format( trainset[0].shape[0], testset[0].shape[0]) classalgs = { 'Logistic Regression': algs.LogitReg(), } for learnername, learner in classalgs.iteritems(): print 'Running learner = ' + learnername # Train model dividedDS = {} dividedDS = learner.learn(trainset[0], trainset[1]) predictions = learner.predict(testset[0]) accuracy = getaccuracy(testset[1], predictions) print 'Accuracy for ' + learnername + ': ' + str(accuracy)
dataset = np.genfromtxt('datasets/numericsequence.csv', delimiter=',') # dataset = dtl.load_occupancy_dataset() # dtl.load_occupancy_dataset(trainsize, testsize) np.random.shuffle(dataset) errors = {} # accuracies = [] classalgs = {} numparams = 0 parameters = {} for i in range(5): smallDataSet = dataset[i * 1300:(i + 1) * 1300] classalgs = { 'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}), 'Logistic Regression': algs.LogitReg(), 'Neural Network': algs.NeuralNet({ 'epochs': 100, 'stepsize': 0.01, 'nh': 8, 'ni': 19 }) # 'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}), # 'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}), } numalgs = len(classalgs) parameters = ( # {'regwgt': 0.0, 'nh': 4}, { 'regwgt': 0.01,
def classify(): # init variables run = True plot = True trainsize = 12500 testsize = 12500 numruns = 1 k_fold = False dataset_file = "data.csv" classalgs = {'Logistic Regression': algs.LogitReg()} numalgs = len(classalgs) num_steps = 1 parameters = ( { 'regularizer': 'None', 'stepsize': 0.001, 'num_steps': num_steps, 'batch_size': 2 }, #{'regularizer': 'None', 'stepsize':0.01, 'num_steps':300, 'batch_size':20}, ) numparams = len(parameters) accuracy = {} for learnername in classalgs: accuracy[learnername] = np.zeros((numparams, numruns)) # load dataset & shuffle dataset = dp.readcsv(dataset_file) Y = cc.getData("ia_success") Y = np.array(Y).astype(np.float) #X = cc.getListedData("fbp_HFI") X = cc.getListedDataList([ 'fbp_CFB', 'fbp_CFC', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class' ]) #X = cc.getListedDataList(['assessment_result', 'max_size', 'first_size', 'first_status_held', 'sec_to_uc', 'aircraft_n_Fixed', 'aircraft_n_Rotary', 'aircraft_n_total', 'aircraft_hr_Fixed', 'aircraft_hr_Rotary', 'aircraft_hr_total', 'n_firefighters', 'n_non_firefighters', 'hr_firefighters', 'hr_non_firefighters', 'drop_amount_retardant', 'drop_amount_water', 'drop_amount_total', 'n_fire_past_1', #'n_fire_past_7', 'n_fire_past_30', 'response_time', 'general_cause', 'year', 'month', 'latitude', 'longitude', 'assessment_size', 'fire_spread_rate', 'fire_position_on_slope', 'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'weather_conditions_over_fire', 'equipment_Transportation', 'equipment_Water_Delivery', 'equipment_Sustained_Action', 'equipment_Fire_Guard_Building', #'equipment_Crew_Gear', 'equipment_Base_Camp', 'equipment_WaterTruck_Transportation', 'wstation_dry_bulb_temperature', 'wstation_relative_humidity', 'wstation_wind_speed_kmh', 'wstation_wind_direction', 'wstation_precipitation', 'wstation_fine_fuel_moisture_code', 'wstation_duff_moisture_code', 'wstation_drought_code', 'wstation_build_up_index', 'wstation_initial_spread_index', 'wstation_fire_weather_index', 'wstation_daily_severity_rating', 'fuelgrid_C', 'fuelgrid_D', 'fuelgrid_M', 'fuelgrid_Nonfuel', 'fuelgrid_O', #'fuelgrid_S', 'fuelgrid_Unclassified', 'fuelgrid_Water', 'fuel_type2', 'grouped_fuel_type2', 'fbp_CFB', 'fbp_CFC', 'fbp_FD', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class', 'fuel_type', 'grouped_fuel_type', 'test_i' #]) X = cc.getListedDataList([ 'max_size', 'first_size', 'first_status_held', 'sec_to_uc', 'aircraft_n_Fixed', 'aircraft_n_Rotary', 'aircraft_n_total', 'aircraft_hr_Fixed', 'aircraft_hr_Rotary', 'aircraft_hr_total', 'n_firefighters', 'n_non_firefighters', 'hr_firefighters', 'hr_non_firefighters', 'drop_amount_retardant', 'drop_amount_water', 'drop_amount_total', 'n_fire_past_1', 'n_fire_past_7', 'n_fire_past_30', 'response_time', 'general_cause', 'year', 'month', 'latitude', 'longitude', 'assessment_size', 'fire_spread_rate', 'fire_position_on_slope', 'temperature', 'relative_humidity', 'wind_direction', 'wind_speed', 'weather_conditions_over_fire', 'equipment_Transportation', 'equipment_Water_Delivery', 'equipment_Sustained_Action', 'equipment_Fire_Guard_Building', 'equipment_Crew_Gear', 'equipment_Base_Camp', 'equipment_WaterTruck_Transportation', 'wstation_dry_bulb_temperature', 'wstation_relative_humidity', 'wstation_wind_speed_kmh', 'wstation_wind_direction', 'wstation_precipitation', 'wstation_fine_fuel_moisture_code', 'wstation_duff_moisture_code', 'wstation_drought_code', 'wstation_build_up_index', 'wstation_initial_spread_index', 'wstation_fire_weather_index', 'wstation_daily_severity_rating', 'fuelgrid_C', 'fuelgrid_D', 'fuelgrid_M', 'fuelgrid_Nonfuel', 'fuelgrid_O', 'fuelgrid_S', 'fuelgrid_Unclassified', 'fuelgrid_Water', 'fuel_type2', 'grouped_fuel_type2', 'fbp_CFB', 'fbp_CFC', 'fbp_FD', 'fbp_HFI', 'fbp_RAZ', 'fbp_ROS', 'fbp_SFC', 'fbp_TFC', 'fbp_HFI_class', 'fuel_type', 'grouped_fuel_type', 'test_i' ]) #print(X) X = np.array(X).astype(np.float) #trainX, testX = pickle. load(open(dataset_file, "rb")) #trainY = np.append(np.zeros(len(trainX[0][2500:])),np.ones(len(trainX[1][2500:]))) #testY = np.append(np.zeros(len(testX[0])),np.ones(len(testX[1]))) #valY = np.append(np.zeros(2500),np.ones(2500)) #valX = np.append(trainX[0][:2500], trainX[1][:2500], axis=0) #trainX = np.append(trainX[0][2500:], trainX[1][2500:], axis=0) #testX = np.append(testX[0], testX[1], axis=0) np.random.seed(3111) np.random.shuffle(X) np.random.seed(3111) np.random.shuffle(Y) trainX = X[:len(X) // 2] valX = X[len(X) // 2:len(X) * 3 // 4] testX = X[len(X) * 3 // 4:] trainY = Y[:len(Y) // 2] valY = Y[len(Y) // 2:len(Y) * 3 // 4] testY = Y[len(Y) * 3 // 4:] # Run if run: for r in range(numruns): print( ('Running on train={0}, val={1}, test={2} samples for run {3}' ).format(trainX.shape[0], valX.shape[0], testX.shape[0], r)) # test different parameters (only one for this assignment) for p in range(numparams): params = parameters[p] # only one algorithm for now for learnername, learner in classalgs.items(): # Reset learner for new parameters learner.reset(params) print('Running learner = ' + learnername + ' on parameters ' + str(learner.getparams())) # Train model #print("trainset0: ", trainset[0]) learner.learn(trainX, trainY, valX, valY, testX, testY) # Test model predictions = learner.predict(testX) acc = utils.getaccuracy(testY, predictions) print('accuracy for ' + learnername + ': ' + str(acc)) accuracy[learnername][p, r] = acc # plot if plot == True: print("PLOT!") accuracy_val, accuracy_test, accuracy_train, best_accuracy, best_weight = pickle.load( open("learning_acc.pkl", "rb")) print("best_accuracy : val,train,test", accuracy_val, accuracy_train, accuracy_test) epi = np.arange(0, num_steps, 1) plt.plot(epi, accuracy_val, label='validation accuracy : 1') plt.plot(epi, accuracy_test, label='test accuracy : 2') plt.plot(epi, accuracy_train, label='train accuracy : 3') plt.xlabel('epochs') plt.ylabel('Accuracy %') plt.legend() plt.show()
def geterror(ytest, predictions): return 100.0 - getaccuracy(ytest, predictions) if __name__ == '__main__': trainsize = 70000 testsize = 30000 numruns = 3 classalgs = { # 'Random': algs.Classifier(), # 'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}), # 'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}), # 'Linear Regression': algs.LinearRegressionClass(), 'Logistic Regression': algs.LogitReg(), 'Radial Basis Transformation': algs.LogitReg(), # 'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}), # 'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}), # 'ElasticNet Logistic Regression': algs.LogitReg({'regularizer': 'elasticNet'}), # 'Logistic Alternative': algs.LogitRegAlternative(), # 'Neural Network': algs.NeuralNet({'epochs': 100, 'stepsize': 0.01, 'nh': 8, 'ni': 19}) } numalgs = len(classalgs) parameters = ( # {'regwgt': 0.0, 'nh': 4}, { 'regwgt': 0.01, 'nh': 8 },
def geterror(ytest, predictions): return (100.0 - getaccuracy(ytest, predictions)) if __name__ == '__main__': trainsize = 5000 testsize = 5000 numruns = 10 classalgs = { 'Random': algs.Classifier(), 'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}), 'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}), 'Linear Regression': algs.LinearRegressionClass(), 'Logistic Regression': algs.LogitReg(), 'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}), 'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}), 'Logistic Alternative': algs.LogitRegAlternative(), 'Neural Network': algs.NeuralNet({'epochs': 100}) } numalgs = len(classalgs) parameters = ( { 'regwgt': 0.0, 'nh': 4 }, { 'regwgt': 0.01, 'nh': 8
obj = [] #trainset, testset = loadmadelon() print('Running on train={0} and test={1} samples').format( trainset[0].shape[0], testset[0].shape[0]) nnparams = {'ni': trainset[0].shape[1], 'nh': 64, 'no': 1} """type parameter should be L1,L2,None or Other""" """regwt should be user defined parameter""" lrparms = {'regwt': 0, 'type': "None"} classalgs = { 'Random': algs.Classifier(), 'Linear Regression': algs.LinearRegressionClass(), 'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}), 'Naive Bayes Ones': algs.NaiveBayes(), 'My Classifier': algs.MyClassifier(), 'Logistic Regression': algs.LogitReg(lrparms), 'Neural Network': algs.NeuralNet(nnparams) } classalgs1 = collections.OrderedDict(sorted(classalgs.items())) for learnername, learner in classalgs1.iteritems(): print 'Running learner = ' + learnername # Train model if learnername == "Linear Regression": lobj = learner if learnername == "Logistic Regression": learner.learn(trainset[0], trainset[1], lobj) else:
testsize = 5000 numruns = 10 classalgs = { 'Random': algs.Classifier(), #'Naive Bayes': algs.NaiveBayes({'usecolumnones': False}), #'Naive Bayes Ones': algs.NaiveBayes({'usecolumnones': True}), 'Linear Regression': algs.LinearRegressionClass(), #'Logistic Regression': algs.LogitReg(), #'L1 Logistic Regression': algs.LogitReg({'regularizer': 'l1'}), #'L2 Logistic Regression': algs.LogitReg({'regularizer': 'l2'}), #'Logistic Alternative': algs.LogitRegAlternative(), #'Neural Network': algs.NeuralNet({'epochs': 100}) #'RBF_linearRegression ': algs.RBF_linearRegression(), 'RBF_LogitReg': algs.RBF_LogitReg(), 'LogitReg': algs.LogitReg() } numalgs = len(classalgs) parameters = ({ 'beta': 0.5 }, #{'beta':1.0}, #{'beta':2.0} ) numparams = len(parameters) errors = {} for learnername in classalgs: errors[learnername] = np.zeros((numparams, numruns))
"""The choice of the number of folds should be user-input""" fold=10 trainlabel=np.reshape(trainset[1],(-1,1)) trset = np.hstack((trainset[0],trainlabel)) numinputs = trset.shape[1]-1 np.random.shuffle(trset) parts = [trset[i::fold] for i in xrange(fold)] obj=[] print('Running on train={0} and test={1} samples').format(trainset[0].shape[0], testset[0].shape[0]) parm_pass={'Neural Network':{'ni': trset.shape[1]-1, 'nh': 0, 'no': 1}, 'Logistic Regression':{'regwt':0,'type':"L2"}} classalgs = {'Linear Regression': algs.LinearRegressionClass(), 'Naive Bayes Ones': algs.NaiveBayes(), 'Logistic Regression': algs.LogitReg(parm_pass['Logistic Regression']), 'Neural Network': algs.NeuralNet(parm_pass['Neural Network']) } classalgs1 = collections.OrderedDict(sorted(classalgs.items())) best_parm=[] for learnername , learner in classalgs1.iteritems(): print 'Running learner = ' + learnername # # Train model parm_accuracy={} for j in range(0,len(parm_dict[learnername])):