def train_converted_text(svm_file, text_converter, grid_arguments='0', feature_arguments='', train_arguments=''): ''' return a TextModel ''' train_arguments = '-s 4 ' + train_arguments if grid_arguments != '0' and grid_arguments != 0: grid_multi_path = os.path.dirname( __file__) + '/learner/learner_impl.py' default_grid_arguments = '-svmtrain {0} -log2g null -log2c -6,6,2 '.format( os.path.dirname(__file__) + '/learner/learner_impl.py') # default_grid_arguments = '-svmtrain /home/pio/yast/yast/learner/learner_impl.py -log2g null -log2c -6,6,2 '#.format(path.dirname(__file__) + '/learner/learner_impl.py') if grid_arguments == '1' or grid_arguments == 1: try: os.chmod(grid_multi_path, stat.S_IRWXU) # Execute by owner. except OSError: raise OSError("Grid search needs sudo") grid_arguments = default_grid_arguments else: grid_arguments = default_grid_arguments + grid_arguments # print "ggg" , svm_file, "||", grid_arguments , ' || ', train_arguments , '|| ', feature_arguments parameters = find_parameters( svm_file, grid_arguments + ' ' + train_arguments + ' ' + feature_arguments)[1] train_arguments = train_arguments + ' -c ' + str(parameters["c"]) m = train(svm_file, feature_arguments, train_arguments) return TextModel(text_converter, m)
def ParameterOptimization(alg_type_list, test_file_list): CreateNewDirectory('train_param') train_param_list = [] #if the test data are modified. And re-compute the best parameters. if IsModify(test_file_list) == True: options = [ '-svmtrain', '/usr/bin/svm-train', '-gnuplot', '/usr/bin/gnuplot', '-log2c', '-5,5,1', '-log2g', '-4,0,1', '-log2p', '-5,5,1', '-s', '3', '-t', '2', '-v', '5', '-m', '300' ] for type in alg_type_list: filename = './traindata/' + type + '_train_data' ret_best_params = grid.find_parameters(filename, options) print 'ret_best_param : ', ret_best_params tmp = str(ret_best_params[1].values()).strip('[] ') w_file = file('train_param/' + type + 'train_param', 'w').write(tmp) train_param_list.append(tmp.split(', ')) else: for i in range(len(alg_type_list)): tmp = str( file('train_param/' + alg_type_list[i] + 'train_param', 'r').readline()) train_param_list.append(tmp.split(', ')) print 'all train_param_list : ', train_param_list return train_param_list
def classify(images, classes_list, train_set, test_set, pos_fold, descriptor, parameters): """ Performs the classification of the test_set according to the train_set. Input: - images: Python dictionary with all images from the dataset and its feature vector. - classes: Python dictionary with all classes from the dataset and its images. - train_set: List of images in the train set. - test_set: List of images in the test set. - fv_pos: Position of the feature vector according to the train and test method. - descriptor: Name of the descriptor used in the extraction step, to obtain it distance function (not used in this method). parameters: Parameters of the libSVM method: - Kernel: kernel used in the SVM - C: Cost of the SVM - degree: degree of the Kernel function - gamma: gamma of the Kernel function - Cross-Validation: Number of folds in the cross-validation mode - Probabilities: Get as predict the probabilities of each class Output: """ print "Classification: libSVM 3.17" #Get parameters libSVM_kernel = kernel(parameters['Kernel']) libSVM_c = float(parameters['C']) libSVM_degree = int(parameters['degree']) libSVM_gamma = float(parameters['gamma']) libSVM_cv = int(parameters['Cross-Validation']) libSVM_probability = int(parameters['Probabilities']) #Paths dirname = os.path.abspath(os.path.join(os.path.dirname(__file__))) temp_path = os.path.abspath(os.path.join(dirname, "..", "..", "temp")) train_path = os.path.join(temp_path, "libSVM.train") model_path = os.path.join(temp_path, "libSVM.train.model_" + str(pos_fold)) test_path = os.path.join(temp_path, "libSVM.test") output_path = os.path.join(temp_path, "libSVM.output") plugin_train = "svm-train" plugin_test = "svm-predict" system_platform = [platform.system(), platform.architecture()[0]] if system_platform[0] == 'Linux': if system_platform[1] == '32bit': plugin_train += "_32l" plugin_test += "_32l" #Preprocess each class to a unique value to the classification label_encoder = preprocessing.LabelEncoder() label_encoder.fit(classes_list) print "List of classes of this experiment:", label_encoder.classes_ list_class = [] list_fv = [] #Read the train file and save the list of class and the list #of feature vectors for img in train_set: list_class.append(images[img][POS_CLASSES][INDEX_ZERO]) list_fv.append(numpy.array(images[img][POS_FV][INDEX_ZERO])) list_train = numpy.array(list_fv) list_train_class = numpy.array(list_class) #Given a list of classes, transform each value in this list to a integer list_train_class = label_encoder.transform(list_train_class) #Read the test list and save the list of class and the list #of feature vectors list_img = test_set list_class = [] list_fv = [] for img in test_set: list_class.append(images[img][POS_CLASSES][INDEX_ZERO]) list_fv.append(numpy.array(images[img][POS_FV][INDEX_ZERO])) list_test = numpy.array(list_fv) list_test_class = numpy.array(list_class) list_test_class = label_encoder.transform(list_test_class) #SVM Fit #------------------------------------------------------------------------- print "\tFit" #Create train file train_file = open(train_path, "wb") for item_class, item_fv in izip(list_train_class, list_train): train_file.write(str(item_class) + " ") for i in range(len(item_fv)): train_file.write(str(i + 1) + ":" + str(item_fv[i]) + " ") train_file.write("\n") train_file.close() #Grid-Search result, param = grid.find_parameters(train_path, '-gnuplot null') libSVM_c = param['c'] libSVM_gamma = param['g'] #----------- cmd_train = """ %s%s.%s%s -t %d -d %d -g %f -c %f -b %d "%s" "%s" """ % (dirname, os.sep, os.sep, plugin_train, libSVM_kernel, libSVM_degree, libSVM_gamma, libSVM_c, libSVM_probability, train_path, model_path) print cmd_train #Execute SVM-Train call(cmd_train, shell=True) print "\tEnd Fit" #------------------------------------------------------------------------- #Read configuration of the libSVM model model_file = open(model_path, "rb") print model_path count_head_lines = 0 for line in model_file.readlines(): key_model = line.split()[0] if key_model == "label": model_labels = map(int, line.split()[1:]) break model_file.close() model_paths = [model_path] #SVM Predict #------------------------------------------------------------------------- #Create test file test_file = open(test_path, "wb") for item_class, item_fv in izip(list_test_class, list_test): test_file.write(str(item_class) + " ") for i in range(len(item_fv)): test_file.write(str(i + 1) + ":" + str(item_fv[i]) + " ") test_file.write("\n") test_file.close() #Execute SVM-Predict cmd_test = """ %s%s.%s%s -b %d "%s" "%s" "%s" """ % (dirname, os.sep, os.sep, plugin_test, libSVM_probability, test_path, model_path, output_path) print cmd_test call(cmd_test, shell=True) print "\tEnd Predict" #------------------------------------------------------------------------- #Read output file to get the result of the predict output_file = open(output_path, "rb") if libSVM_probability: output_file.readline() list_result = [] for line in output_file.readlines(): dict_classes = {key_class: 0.0 for key_class in classes_list} output_line = map(float, line.split()[1:]) for class_item, perc in \ izip(list(label_encoder.inverse_transform(model_labels)), output_line): dict_classes[class_item] = perc percentage_list = [dict_classes[key_class] for key_class in \ label_encoder.classes_] list_result.append(percentage_list) else: list_predict = map(int, output_file.readlines()) list_predict = label_encoder.inverse_transform(list_predict) list_result = [] for predict in list_predict: img_result = [0.0] * len(label_encoder.classes_) #Find all predict in the list label_encoder.classes_ and grab the #first index pos = numpy.where(label_encoder.classes_ == predict)[0][0] img_result[pos] = 1.0 list_result.append(img_result) output_file.close() #Remove temporary files os.remove(train_path) os.remove(test_path) return test_set, list_class, list_result, label_encoder.classes_, \ model_paths
import svmutil as sm import numpy as np import grid def loadDataSet(fileName): dataMat=[];labelMat=[] with open(fileName) as fr: for line in fr.readlines(): lineArr=line.strip().split('\t') dataMat.append([float(lineArr[0]),float(lineArr[1])]) labelMat.append(float(lineArr[2])) return dataMat,labelMat def testdata(trainfilename,testfilename): dataArr1, labelArr1 = loadDataSet(trainfilename) dataArr2, labelArr2 = loadDataSet(testfilename) errorCount = 0 arg=['-s',0,'-t',2,'-c',0.031,'-g',0.0078] model = sm.svm_train(labelArr1, dataArr1, arg) [plabel, pacc,pval]=sm.svm_predict(labelArr2,dataArr2,model) if __name__=='__main__': trainfilename='C:/Users/XUEJW/Desktop/dataset/MLiA_SourceCode/machinelearninginaction/Ch06/testSetRBF.txt' testfilename='C:/Users/XUEJW/Desktop/dataset/MLiA_SourceCode/machinelearninginaction/Ch06/testSetRBF2.txt' #testdata(trainfilename,testfilename) grid.find_parameters(trainfilename)
prob_precomputed = svmutil.svm_problem(y1, x_train_precomputed, isKernel=True) # grid search for the best parameters print('grid searching...') grid_search = { 'linear': '-t 0 -log2c -5,15,2', 'polynomial': '-t 1 -log2c -5,15,2 -log2g -5,15,2 -log2r -3,5,2 -d 4', 'RBF': '-t 2 -log2c -5,15,2 -log2g -5,15,2', } grid_search_precomputed = {'linear+RBF': '-t 4 -log2c -5,15,2'} best_param = [] kernel_tab = [] kernel = [] for kernel_type, opts in grid_search.items(): rst, tab, col = grid.find_parameters(prob, opts) kernel.append(kernel_type) kernel_tab.append(pd.DataFrame(tab, columns=col)) best_param.append(rst) for kernel_type, opts in grid_search_precomputed.items(): rst, tab, col = grid.find_parameters(prob_precomputed, opts) kernel.append(kernel_type) kernel_tab.append(pd.DataFrame(tab, columns=col)) best_param.append(rst) for i in range(len(kernel_tab)): kernel_tab[i].to_csv('best param/' + kernel[i] + '.csv') print(kernel[i] + ':', end=' ') print(best_param[i]) '''
#Computers Confusion Matrix for i in range(len(true)): result[classInd[true[i]]][classInd[pred[i]]] += 1 return result #Read in Data trainRAD_y, trainRAD_x = svm_read_problem('./rad_d2') trainCust_y, trainCust_x = svm_read_problem('./cust_d2') testRAD_y, testRAD_x = svm_read_problem('./rad_d2.t') testCust_y, testCust_x = svm_read_problem('./cust_d2.t') #Perform Cross Validation to find Best Parameters print('INITIALIZING CROSS VALIDATION') bestRAD_acc, bestRAD_params = find_parameters('./rad_d2') print('Best Accuracy with RAD is:', bestRAD_acc, '\nWith C and y:', bestRAD_params) bestCust_acc, bestCust_params = find_parameters('./cust_d2') print('Best Accuracy with Custom is:', bestCust_acc, '\nWith C and y:', bestCust_params) #Perform SVM training print('TRAINING SVMs') parameter_string = '-s 0 -t 2 -g ' + str(bestRAD_params['g']) + ' -c ' + str( bestRAD_params['c']) mRAD = svm_train(trainRAD_y, trainRAD_x, parameter_string) parameter_string = '-s 0 -t 2 -g ' + str(bestCust_params['g']) + ' -c ' + str( bestCust_params['c']) mCust = svm_train(trainCust_y, trainCust_x, parameter_string)