def launchsession2(num_slots, descriptor_type, randomSplits, levels_pyramid, useKernelInter, useKernelPyr, rocCurveCM): start = time.time() # Read the train and test files train_images_filenames, test_images_filenames, train_labels, test_labels = dataUtils.readData( ) #Divide training into training and validation splits train_percentage = 0.7 #70% training 30%validation if randomSplits: TrainingSplit, ValidationSplit = dataUtils.getRandomTrainingValidationSplit( train_images_filenames, train_labels, train_percentage) else: TrainingSplit, ValidationSplit = dataUtils.getTrainingValidationSplit( train_images_filenames, train_labels, train_percentage) #Get descriptors D if levels_pyramid != 0: D, Train_descriptors, Train_label_per_descriptor, Train_keypoints, Train_image_size = descriptors.extractFeaturesPyramid( TrainingSplit, descriptor_type, num_slots) else: D, Train_descriptors, Train_label_per_descriptor = descriptors.extractFeatures( TrainingSplit, descriptor_type, num_slots) #Computing bag of words using k-means and save codebook k = 512 codebook = BoW.computeCodebook(D, k) #Determine visual words if levels_pyramid != 0: visual_words = BoW.getVisualWordsSpatialPyramid( codebook, k, Train_descriptors, Train_image_size, Train_keypoints, levels_pyramid) else: visual_words = BoW.getVisualWords(codebook, k, Train_descriptors) # Train a linear SVM classifier if useKernelInter | useKernelPyr: #Kernel intersection clf, stdSlr, train_scaled = SVMClassifiers.trainSVMKernel( visual_words, Train_label_per_descriptor, useKernelPyr, levels_pyramid, Cparam=1, probabilities=rocCurveCM) else: clf, stdSlr = SVMClassifiers.trainSVM(visual_words, Train_label_per_descriptor, Cparam=1, kernel_type='linear', probabilities=rocCurveCM) #For test set if useKernelInter | useKernelPyr: predictedLabels2 = SVMClassifiers.predictKernel( test_images_filenames, descriptor_type, clf, stdSlr, train_scaled, k, codebook, levels_pyramid, num_slots) accuracy2 = Evaluation.computeAccuracyOld(predictedLabels2, test_labels) print 'Final Kernel intersection test accuracy: ' + str(accuracy2) else: # Get all the test data and predict their labels predictedLabels = SVMClassifiers.predict(test_images_filenames, descriptor_type, stdSlr, codebook, k, levels_pyramid, num_slots) #Compute accuracy accuracy = Evaluation.getMeanAccuracy(clf, predictedLabels, test_labels) print 'Final test accuracy: ' + str(accuracy) #For validation set validation_images_filenames, validation_labels = dataUtils.unzipTupleList( ValidationSplit) if useKernelInter | useKernelPyr: predictedLabels2 = SVMClassifiers.predictKernel( validation_images_filenames, descriptor_type, clf, stdSlr, train_scaled, k, codebook, levels_pyramid, num_slots) accuracy2 = Evaluation.computeAccuracyOld(predictedLabels2, validation_labels) print 'Final Kernel intersection validation accuracy: ' + str( accuracy2) else: # Get all the test data and predict their labels predictedLabels = SVMClassifiers.predict(validation_images_filenames, descriptor_type, stdSlr, codebook, k, levels_pyramid, num_slots) #Compute accuracy validation_accuracy = Evaluation.getMeanAccuracy( clf, predictedLabels, validation_labels) print 'Final validation accuracy: ' + str(validation_accuracy) #Roc curve and Confusion Matrix if rocCurveCM: graphs.rcurve(predictedLabels, validation_labels, clf) graphs.plot_confusion_matrix(clf, validation_labels, stdSlr.transform(predictedLabels), normalize=False, title='Confusion matrix', cmap=plt.cm.Blues) end = time.time() print 'Done in ' + str(end - start) + ' secs.'
def launchsession4(layer_taken, randomSplits, k, useServer, method_used): start = time.time() # Read the train and test files if useServer: train_images_filenames, test_images_filenames, train_labels, test_labels = dataUtils.readServerData( ) else: train_images_filenames, test_images_filenames, train_labels, test_labels = dataUtils.readData( ) #Divide training into training and validation splits train_percentage = 0.7 #70% training 30%validation if randomSplits: TrainingSplit, ValidationSplit = dataUtils.getRandomTrainingValidationSplit( train_images_filenames, train_labels, train_percentage) else: TrainingSplit, ValidationSplit = dataUtils.getTrainingValidationSplit( train_images_filenames, train_labels, train_percentage) #Obtain information from VGG ConvNet CNN_base_model = descriptors.getBaseModel() #Base model #Compute features print 'Extracting features' D, Train_descriptors, Train_label_per_descriptor = descriptors.extractFeaturesMaps( TrainingSplit, layer_taken, CNN_base_model, method_used) not_use_BoW_other_layers = method_used[ 'method_to_reduce_dim'] == 'Average' or method_used[ 'method_to_reduce_dim'] == 'Max' if layer_taken == 'fc1' or layer_taken == 'fc2' or layer_taken == 'flatten' or not_use_BoW_other_layers: visual_words = D codebook = None else: if method_used['usePCA'] > 0: print 'Applying PCA' D, Train_descriptors, pca = PCA_computing.PCA_to_data( D, Train_descriptors, method_used['usePCA']) else: pca = None #Computing bag of words using k-means and save codebook when necessary codebook = BoW.computeCodebook(D, k) #Determine visual words visual_words = BoW.getVisualWords(codebook, k, Train_descriptors) # Train a linear SVM classifier clf, stdSlr = SVMClassifiers.trainSVM(visual_words, Train_label_per_descriptor, Cparam=1, kernel_type='linear') #For test set TestSplit = zip(test_images_filenames, test_labels) if layer_taken == 'fc1' or layer_taken == 'fc2' or layer_taken == 'flatten' or not_use_BoW_other_layers: ##Not using BoVW predictedLabels = SVMClassifiers.predict(TestSplit, layer_taken, stdSlr, clf, CNN_base_model, method_used) accuracy = Evaluation.computeAccuracyOld(predictedLabels, test_labels) print 'Final test accuracy: ' + str(accuracy) else: #BoVW predictedLabels = SVMClassifiers.predictBoVW(TestSplit, layer_taken, stdSlr, codebook, k, CNN_base_model, pca, method_used) accuracy = Evaluation.getMeanAccuracy(clf, predictedLabels, test_labels) print 'Final test accuracy: ' + str(accuracy) #For validation set validation_images_filenames, validation_labels = dataUtils.unzipTupleList( ValidationSplit) if layer_taken == 'fc1' or layer_taken == 'fc2' or layer_taken == 'flatten' or not_use_BoW_other_layers: #Not using BoVW predictedLabels = SVMClassifiers.predict(ValidationSplit, layer_taken, stdSlr, clf, CNN_base_model, method_used) validation_accuracy = Evaluation.computeAccuracyOld( predictedLabels, validation_labels) print 'Final validation accuracy: ' + str(validation_accuracy) else: #BoVW predictedLabels = SVMClassifiers.predictBoVW(ValidationSplit, layer_taken, stdSlr, codebook, k, CNN_base_model, pca, method_used) validation_accuracy = Evaluation.getMeanAccuracy( clf, predictedLabels, validation_labels) print 'Final validation accuracy: ' + str(validation_accuracy) end = time.time() print 'Done in ' + str(end - start) + ' secs.'
def launchsession3(num_slots,descriptor_type,randomSplits,levels_pyramid,usePCA): start = time.time() # Read the train and test files train_images_filenames,test_images_filenames,train_labels,test_labels=dataUtils.readData() #Divide training into training and validation splits train_percentage=0.7#70% training 30%validation if randomSplits: TrainingSplit, ValidationSplit=dataUtils.getRandomTrainingValidationSplit(train_images_filenames,train_labels,train_percentage) else: TrainingSplit, ValidationSplit=dataUtils.getTrainingValidationSplit(train_images_filenames,train_labels,train_percentage) #Get descriptors D if levels_pyramid>0: D, Train_descriptors, Train_label_per_descriptor, Train_keypoints, Train_image_size = descriptors.extractFeaturesPyramid(TrainingSplit,descriptor_type,num_slots) else: D, Train_descriptors, Train_label_per_descriptor = descriptors.extractFeatures(TrainingSplit, descriptor_type,num_slots) if usePCA>0: print 'Applying PCA' D, Train_descriptors, pca = PCA_computing.PCA_to_data(D, Train_descriptors, usePCA) else: pca = None #Computing gmm k = 64 # short codebooks (32, 64...) gmm = fisherVectors.getGMM(D,k) for idx,TrainDes in enumerate(Train_descriptors): train_descriptor = np.float32(TrainDes) Train_descriptors[idx]=train_descriptor if levels_pyramid > 0: fisher = fisherVectors.getFisherVectorsSpatialPyramid(Train_descriptors, k, gmm, Train_image_size, Train_keypoints, levels_pyramid) else: fisher = fisherVectors.getFisherVectors(Train_descriptors,k,gmm) # Power-normalization #fisher=fisherVectors.powerNormalization(fisher) # L2 normalize fisher=fisherVectors.normalizeL2(fisher) # Train a linear SVM classifier clf, stdSlr=SVMClassifiers.trainSVM(fisher,Train_label_per_descriptor,Cparam=1,kernel_type='linear') #For test set # Get all the test data and predict their labels predictedLabels=SVMClassifiers.predict(test_images_filenames,descriptor_type,stdSlr,gmm, k, levels_pyramid,num_slots,pca) #Compute accuracy accuracy = Evaluation.getMeanAccuracy(clf,predictedLabels,test_labels) print 'Final test accuracy: ' + str(accuracy) #For validation set validation_images_filenames,validation_labels=dataUtils.unzipTupleList(ValidationSplit) # Get all the test data and predict their labels predictedLabels=SVMClassifiers.predict(validation_images_filenames,descriptor_type,stdSlr, gmm, k, levels_pyramid,num_slots,pca) #Compute accuracy validation_accuracy = Evaluation.getMeanAccuracy(clf,predictedLabels,validation_labels) print 'Final validation accuracy: ' + str(validation_accuracy) end=time.time() print 'Done in '+str(end-start)+' secs.'
return model def catboost_predict(model,df): col=df.columns.tolist() test_pool=Pool(df,cat_features=[col.index(i) for i in cat_colums]) return model.predict(test_pool) def prepare_submission(test_merged,array): sub_df=pd.DataFrame(columns=['id','num_orders']) sub_df['id']=test_merged['id'].values sub_df['num_orders']=array #sub_df['num_orders_tree']=arrayt #sub_df['avg']=np.mean(array+arrayt,axis=0) sub_df.to_csv('sub_tree.csv',index=False) if __name__=='__main__': train_merged,test_merged=mergeData(*readData()) catProcess=Categorify(cat_colums,numeric_cols) catProcess.apply_train(train_merged) catProcess.apply_test(test_merged) # print(train_merged.head()) # catmodel=catboost_train(train_merged[numeric_cols+cat_colums],train_merged['num_orders'].astype('float32')) # pred=catboost_predict(catmodel,test_merged[numeric_cols+cat_colums]) # prepare_submission(test_merged,pred) rf,xgbr=train_trees(train_merged[numeric_cols+cat_colums],train_merged['num_orders'].astype('float32')) df=predict_trees([rf,xgbr],test_merged[numeric_cols+cat_colums]) df.to_csv('res.csv')
for res in estimator.predict(input_fn=inp_fn): predicted.append(res['predictions']) return np.array(predicted).ravel() def prepare_submission(test_merged, array, arrayt): sub_df = pd.DataFrame(columns=['id', 'num_orders']) sub_df['id'] = test_merged['id'].values sub_df['num_orders'] = array sub_df['num_orders_tree'] = arrayt sub_df['avg'] = np.mean(array + arrayt, axis=0) sub_df.to_csv('sub_tf4.csv', index=False) if __name__ == '__main__': train, test, center_info, meal_info = readData() train_merged, test_merged = mergeData(train, test, center_info, meal_info) dense_feat, lin_feat = makeFeatureColum(train_merged) bucket_colum, indicator_column = makeFeaturesForTrees() estimator = buildEstimator(dense_feat, lin_feat) treestimator = buildTreeEstimator(bucket_colum + indicator_column) train_inp_fn = estInpFunc() test_inp_fn = estInpFunc(train=False) #train estimator = train_estimator(estimator, train_inp_fn) treestimator = train_estimator(treestimator, train_inp_fn) #test arrayt = predict_using_trained_estimator(treestimator, test_inp_fn) array = predict_using_trained_estimator(estimator, test_inp_fn)
""" Project 1 At the end you should see something like this Step Count:1000 Training accuracy: 0.8999999761581421 loss: 0.42281264066696167 Test accuracy: 0.8199999928474426 loss: 0.4739704430103302 play around with your model to try and get an even better score """ import tensorflow as tf import dataUtils training_data, training_labels = dataUtils.readData("project1trainingdata.csv") test_data, test_labels = dataUtils.readData("project1testdata.csv") # Build tensorflow blueprint # Tensorflow placeholder input_placeholder = tf.placeholder(tf.float32, shape=[None, 113]) # Neural network hidden layers w1 = tf.get_variable("w1", shape=[113, 150], initializer=tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("12", shape=[150], initializer=tf.contrib.layers.xavier_initializer()) hidden_layer_1 = tf.nn.dropout(tf.layers.batch_normalization( tf.nn.relu(tf.matmul(input_placeholder, w1) + b1), axis=1, center=True,