def run_fgm(train_file, test_file,ds, ori_result_file): result_all = [] train_exe_name = exe_path.fgm_train test_exe_name = exe_path.fgm_test #make the result dir dst_folder = './%s' %ds run_util.create_dir(dst_folder) if 'synthetic' in ds: bs_list = l1_def.get_lambda_list(ds,'fgm') else: data_valid_dim = run_util.get_valid_dim(train_file) lambda_list = l1_def.get_lambda_list(ds,'fgm') bs_list = [] b_num = len(lambda_list) for i in range(0,b_num): dim = (int)(data_valid_dim * (1 - lambda_list[i])) if dim > 0: bs_list.append(dim) for bs in bs_list: result_once = [0,0,0,0] model_file = dst_folder + '/fgm_model%g' %bs predict_file = dst_folder + '/fgm_predict%g' %bs result_file = dst_folder + '/' + ori_result_file + '_%d' %bs #clear the file if it already exists open(result_file,'w').close() #evaluate the result train_cmd = train_exe_name + ' -s 12 -c 10 -B %d' %bs + ' %s' %train_file + ' %s' %model_file test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file + '>> %s' %result_file print train_cmd start_time =time.time() os.system(train_cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #predict print test_cmd os.system(test_cmd) result_once[1] = liblinear_util.parse_error_rate(result_file) result_once[2] = bs result_all.append(result_once) return result_all
def run_fgm(train_file, test_file, ds, ori_result_file): result_all = [] train_exe_name = exe_path.fgm_train test_exe_name = exe_path.fgm_test #make the result dir dst_folder = './%s' % ds run_util.create_dir(dst_folder) if 'synthetic' in ds: bs_list = l1_def.get_lambda_list(ds, 'fgm') else: data_valid_dim = run_util.get_valid_dim(train_file) lambda_list = l1_def.get_lambda_list(ds, 'fgm') bs_list = [] b_num = len(lambda_list) for i in range(0, b_num): dim = (int)(data_valid_dim * (1 - lambda_list[i])) if dim > 0: bs_list.append(dim) for bs in bs_list: result_once = [0, 0, 0, 0] model_file = dst_folder + '/fgm_model%g' % bs predict_file = dst_folder + '/fgm_predict%g' % bs result_file = dst_folder + '/' + ori_result_file + '_%d' % bs #clear the file if it already exists open(result_file, 'w').close() #evaluate the result train_cmd = train_exe_name + ' -s 12 -c 10 -B %d' % bs + ' %s' % train_file + ' %s' % model_file test_cmd = test_exe_name + ' %s' % test_file + ' %s' % model_file + ' %s' % predict_file + '>> %s' % result_file print train_cmd start_time = time.time() os.system(train_cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #predict print test_cmd os.system(test_cmd) result_once[1] = liblinear_util.parse_error_rate(result_file) result_once[2] = bs result_all.append(result_once) return result_all
def run_liblinear(train_file, test_file,ds, result_file): result_once = [0,0,0,0] os.system('mkdir ./tmp') dst_folder = './%s' %ds tmp_folder = dst_folder + '/liblinear' #os.system('mkdir -p %s' %tmp_folder) run_util.create_dir(tmp_folder) model_file = tmp_folder + '/model' tmp_file = tmp_folder + '/tmp.txt' train_exe_name = exe_path.liblinar_train_exe_name test_exe_name = exe_path.liblinar_test_exe_name #make the result dir #cmd = 'mkdir -p %s' %dst_folder #os.system(cmd) run_util.create_dir(dst_folder) result_file = './%s' %tmp_folder + '/' + result_file #clear the file if it already exists open(result_file,'w').close() #evaluate the result train_cmd = train_exe_name + ' %s' %train_file + ' %s' %model_file test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %tmp_file + '>> %s' %result_file print train_cmd start_time =time.time() os.system(train_cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #predict print test_cmd os.system(test_cmd) result_once[1] = liblinear_util.parse_error_rate(result_file) valid_dim = run_util.get_valid_dim(train_file) model_size = liblinear_util.get_model_size(model_file) result_once[2] = 100 - (model_size * 100.0 / valid_dim) return [result_once]
def run_liblinear(train_file, test_file, ds, result_file): result_once = [0, 0, 0, 0] os.system('mkdir ./tmp') dst_folder = './%s' % ds tmp_folder = dst_folder + '/liblinear' #os.system('mkdir -p %s' %tmp_folder) run_util.create_dir(tmp_folder) model_file = tmp_folder + '/model' tmp_file = tmp_folder + '/tmp.txt' train_exe_name = exe_path.liblinar_train_exe_name test_exe_name = exe_path.liblinar_test_exe_name #make the result dir #cmd = 'mkdir -p %s' %dst_folder #os.system(cmd) run_util.create_dir(dst_folder) result_file = './%s' % tmp_folder + '/' + result_file #clear the file if it already exists open(result_file, 'w').close() #evaluate the result train_cmd = train_exe_name + ' %s' % train_file + ' %s' % model_file test_cmd = test_exe_name + ' %s' % test_file + ' %s' % model_file + ' %s' % tmp_file + '>> %s' % result_file print train_cmd start_time = time.time() os.system(train_cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #predict print test_cmd os.system(test_cmd) result_once[1] = liblinear_util.parse_error_rate(result_file) valid_dim = run_util.get_valid_dim(train_file) model_size = liblinear_util.get_model_size(model_file) result_once[2] = 100 - (model_size * 100.0 / valid_dim) return [result_once]
def run_liblinear(train_file, test_file,ds, ori_result_file): result_all = [] train_exe_name = exe_path.liblinar_train_exe_name test_exe_name = exe_path.liblinar_test_exe_name #make the result dir dst_folder = './%s' %ds run_util.create_dir(dst_folder) c_list = l1_def.get_lambda_list(ds,'liblinear') for c in c_list: result_once = [0,0,0,0] model_file = dst_folder + '/ll_model%g' %c predict_file = dst_folder + '/ll_predict%g' %c result_file = dst_folder + '/' + ori_result_file + '_%f' %c #clear the file if it already exists open(result_file,'w').close() #evaluate the result train_cmd = train_exe_name + ' -s 5 -c %f' %c + ' %s' %train_file + ' %s' %model_file test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file + '>> %s' %result_file #test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file print train_cmd start_time =time.time() os.system(train_cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #predict print test_cmd os.system(test_cmd) result_once[1] = liblinear_util.parse_error_rate(result_file) valid_dim = run_util.get_valid_dim(train_file) model_size = liblinear_util.get_model_size(model_file) result_once[2] = model_size result_all.append(result_once) return result_all
for opt in opt_list: rows = len(result_all[opt]) cols = len(result_all[opt][0]) for k in range(0,rows): for m in range(0,cols): result_all[opt][k][m] /= rand_num return result_all for ds in ds_list: path_list = dataset.get_file_name(ds) dst_folder = ds dst_folder = ds #os.system("mkdir %s" %dst_folder) run_util.create_dir(dst_folder) result_all = train_model(path_list, dst_folder) for key,val in result_all.iteritems(): #write the result to file parse_file = dst_folder +'/%s' %key + '.txt' run_util.write_parse_result(val,parse_file) opt_list_file = '%s' %dst_folder + os.sep + 'opt_list.txt' try: file_handle = open(opt_list_file,'w') for opt in opt_list: file_handle.write(opt + '.txt\n') except IOError as e:
def run_vw(train_file, test_file, ds, result_file, is_cache=True): tmp_folder = ds + '/vw_tmp' #os.system('mkdir -p %s' %tmp_folder) run_util.create_dir(tmp_folder) model_file = tmp_folder + '/vw_model' rd_model_file = tmp_folder + '/vw_model.txt' tmp_file = tmp_folder + '/vw_tmp.txt' result_file = tmp_folder + '/' + result_file extra_cmd = ' --sgd --binary --loss_function=hinge ' model_cmd = ' --readable_model %s' % rd_model_file + ' -f %s ' % model_file valid_dim = run_util.get_valid_dim(train_file) #transform into vw format if os.path.exists('%s.vw' % train_file) == False: os.system('python ../tools/libsvm2vw.py %s' % train_file) if os.path.exists('%s.vw' % test_file) == False: os.system('python ../tools/libsvm2vw.py %s' % test_file) if is_cache == True: cache_train = train_file + "_cache.vw" cache_test = test_file + "_cache.vw" train_file += ".vw" test_file += ".vw" #evaluate the result if is_cache == True: train_cmd_prefix = '%s' % exe_name + ' %s' % train_file + ' --cache_file %s ' % cache_train test_cmd_prefix = '%s' % exe_name + ' %s' % test_file + ' -t -i %s' % model_file + ' --cache_file %s ' % cache_test else: train_cmd_prefix = '%s' % exe_name + ' %s' % train_file test_cmd_prefix = '%s' % exe_name + ' %s' % test_file + ' -t -i %s' % model_file cmd_postfix = ' 2> %s' % tmp_file result_list = [] lambda_list = l1_def.get_lambda_list(ds, 'vw') for l1 in lambda_list: result_item = [0, 0, 0, 0] #train cmd = train_cmd_prefix + ' --l1 %e' % l1 + extra_cmd + model_cmd + cmd_postfix print cmd start_time = time.time() os.system(cmd) end_time = time.time() #parse learning time result_item[3] = (float)(end_time - start_time) result_item[3] = (float)('%.2f' % result_item[3]) #parse learn error rate result_item[0] = vw_util.parse_error_rate(tmp_file) #test cmd = test_cmd_prefix + extra_cmd + cmd_postfix print cmd os.system(cmd) result_item[1] = vw_util.parse_error_rate(tmp_file) #parse sparsity model_size = vw_util.get_model_size(rd_model_file) result_item[2] = 100 - (model_size * 100.0 / valid_dim) result_list.append(result_item) vw_util.write_parse_result(result_list, result_file) return result_list
def run_vw(train_file, test_file,ds,result_file, is_cache = True): tmp_folder = ds + '/vw_tmp' #os.system('mkdir -p %s' %tmp_folder) run_util.create_dir(tmp_folder) model_file = tmp_folder + '/vw_model' rd_model_file = tmp_folder + '/vw_model.txt' tmp_file = tmp_folder + '/vw_tmp.txt' result_file = tmp_folder + '/' + result_file extra_cmd = ' --sgd --binary --loss_function=hinge ' model_cmd = ' --readable_model %s' %rd_model_file + ' -f %s ' %model_file valid_dim = run_util.get_valid_dim(train_file) #transform into vw format if os.path.exists('%s.vw' %train_file) == False: os.system('python ../tools/libsvm2vw.py %s' %train_file) if os.path.exists('%s.vw' %test_file) == False: os.system('python ../tools/libsvm2vw.py %s' %test_file) if is_cache == True: cache_train = train_file + "_cache.vw" cache_test = test_file + "_cache.vw" train_file += ".vw" test_file += ".vw" #evaluate the result if is_cache == True: train_cmd_prefix = '%s' %exe_name + ' %s' %train_file +' --cache_file %s ' %cache_train test_cmd_prefix = '%s' %exe_name + ' %s' %test_file + ' -t -i %s' %model_file + ' --cache_file %s ' %cache_test else: train_cmd_prefix = '%s' %exe_name + ' %s' %train_file test_cmd_prefix = '%s' %exe_name + ' %s' %test_file + ' -t -i %s' %model_file cmd_postfix = ' 2> %s' %tmp_file result_list = [] lambda_list = l1_def.get_lambda_list(ds,'vw') for l1 in lambda_list: result_item = [0,0,0,0] #train cmd = train_cmd_prefix + ' --l1 %e' %l1 + extra_cmd + model_cmd + cmd_postfix print cmd start_time =time.time() os.system(cmd) end_time = time.time() #parse learning time result_item[3] = (float)(end_time - start_time) result_item[3] = (float)('%.2f' %result_item[3]) #parse learn error rate result_item[0] = vw_util.parse_error_rate(tmp_file) #test cmd = test_cmd_prefix + extra_cmd + cmd_postfix print cmd os.system(cmd) result_item[1] = vw_util.parse_error_rate(tmp_file) #parse sparsity model_size = vw_util.get_model_size(rd_model_file) result_item[2] = 100 - (model_size * 100.0 / valid_dim) result_list.append(result_item) vw_util.write_parse_result(result_list,result_file) return result_list
def run_mRMR(train_file, test_file,ds, result_file): result_all = [] train_exe_name = exe_path.mRMR #make the result dir dst_folder = './%s' %ds run_util.create_dir(dst_folder) data_valid_dim = run_util.get_valid_dim(train_file) data_num = run_util.get_data_num(train_file) #bs_list = l1_def.get_lambda_list(ds,'mRMR') if 'synthetic' in ds: bs_list = l1_def.get_lambda_list(ds,'mRMR') else: lambda_list = l1_def.get_lambda_list(ds,'mRMR') bs_list = [] b_num = len(lambda_list) for i in range(0,b_num): dim = int(data_valid_dim * (1 - lambda_list[i])) if dim > 0 and dim <= 500: bs_list.append(dim) bs_list = l1_def.get_lambda_list(ds,'mRMR') #clear the file if it already exists open(result_file,'w').close() for bs in bs_list: result_once = [0,0,0,0] model_file = dst_folder + '/mRMR_model%d' %bs parse_file = dst_folder + '/mRMR_model_parse%d' %bs if os.path.exists(model_file) == False: print model_file + ' not exist' csv_train_file = train_file + '.csv' if os.path.exists(csv_train_file) == False: #convert data print 'convert data' cmd = exe_path.csv_converter + ' -i %s' %train_file + ' -o %s' %csv_train_file cmd += ' -sdt libsvm -ddt csv' print cmd os.system(cmd) #run mRMR prev_cmd = train_exe_name + ' -v %d' %data_valid_dim + ' -t 0.5 -i %s' %csv_train_file cmd = prev_cmd + ' -n %d' %bs + ' > %s' %model_file print cmd start_time =time.time() os.system(cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #parse result parse_model_file(model_file,parse_file); #run OGD cmd_data = dataset.get_cmd_data_by_file(train_file, test_file,True) cmd = exe_path.SOL_exe_name + cmd_data + ' -m %s' %parse_file + ' -k %d' %bs cmd += dataset.get_model_param(ds,'SGD-FS') cmd += ' -opt mRMR_OGD -norm -loss Hinge >> %s' %result_file print cmd os.system(cmd) result_once[2] = bs result_all.append(result_once) return result_all