def train_model(path_list,dst_folder): train_file = path_list[0] test_file = path_list[1] result_all = {} #random the file if rand_num > 1: rand_file = train_file + '_rand' else: rand_file = train_file rand_file_cache = rand_file + '_cache' for k in range(0,rand_num): if rand_num > 1: print 'shuffle datset...' sol_shuffle.sol_shuffle(train_file, rand_file) cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache) dataset.analyze(rand_file); for opt in opt_list: print '-----------------------------------' print ' Experiment on %s' %opt + ' Random %d' %k print '-----------------------------------' if opt == 'vw': result_file = 'vw_result_%d' %k + '.txt' result_once = run_vw.run_vw(rand_file, test_file, ds, result_file, is_cache) elif opt == 'liblinear': result_file = 'liblinear_result_%d' %k + '.txt' result_once = run_liblinear.run_liblinear(rand_file, test_file, ds, result_file) elif opt == 'fgm': result_file = 'fgm_result_%d' %k + '.txt' result_once = run_fgm.run_fgm(rand_file, test_file, ds, result_file) elif opt == 'mRMR': result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt' result_once = run_mRMR.run_mRMR(rand_file, test_file, ds, result_file) print '\nparsing result...' #write the result to file parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt' result_once2 = run_util.parse_result(result_file, parse_file); bs_num = len(result_once) if bs_num != len(result_once2): print 'inconsistent parsing result' for m in range(0,bs_num): result_once[m][0] = result_once2[m][0] result_once[m][1] = result_once2[m][1] if result_once[m][2] == 0: result_once[m][2] = result_once2[m][2] if result_once[m][3] == 0: result_once[m][3] = result_once2[m][3] else: result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt' cmd = cmd_data cmd += extra_cmd if is_default_param == False: cmd += dataset.get_model_param(ds, opt) run_experiment.run_experiment(opt,result_file,ds, cmd) print '\nparsing result...' #write the result to file parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt' result_once = run_util.parse_result(result_file, parse_file); result_all = add_to_dict(opt,result_all, result_once) #remove previous file if rand_num > 1: os.system('rm -f %s' %rand_file_cache) os.system('rm -f %s' %rand_file) #average the result for opt in opt_list: rows = len(result_all[opt]) cols = len(result_all[opt][0]) for k in range(0,rows): for m in range(0,cols): result_all[opt][k][m] /= rand_num return result_all
return result_list result_item_num = 4 result_list_all = [[0 for y in range(0,result_item_num)] for x in range(0,grid_size)] for split_item in split_list: test_file = split_item train_file_list = filter(lambda x : x != split_item,split_list) train_file = test_file + '_train' os.system('rm -f %s' %train_file) for x in train_file_list: merge_cmd = 'cat %s' %x + ' >> %s' %train_file os.system(merge_cmd) cmd_data = dataset.get_cmd_data_by_file(train_file, test_file) result_list_one = run_one_data(exe_cmd + cmd_data) #raw_input('type to continue' ) for k in range(0,grid_size): for m in range(0,result_item_num): result_list_all[k][m] += float(result_list_one[k][m]) #delete the temp files os.system('rm -f %s' %train_file) os.system('rm -f %s' %train_file + '_cache') #average the results for k in range(0,grid_size): for m in range(0,result_item_num): result_list_all[k][m] /= fold_num
def train_model(path_list, dst_folder): train_file = path_list[0] test_file = path_list[1] result_all = {} #random the file if rand_num > 1: rand_file = train_file + '_rand' else: rand_file = train_file rand_file_cache = rand_file + '_cache' for k in range(0, rand_num): if rand_num > 1: print 'shuffle datset...' sol_shuffle.sol_shuffle(train_file, rand_file) cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache) dataset.analyze(rand_file) for opt in opt_list: for mp_method in mp_list: cmd_mp = ' -mpt %s ' % mp_method for m in range(0, run_time): print '-----------------------------------' print ' Experiment on %s' % opt + ' Random %d' % k + ' Multi-Pass %s' % mp_method + ' Round %d' % m print '-----------------------------------' result_file = dst_folder + '/%s' % opt + '_rand_%d' % k + '_mp_%s' % mp_method + '_round_%d' % m + '.txt' cmd = cmd_data cmd += cmd_mp cmd += extra_cmd if is_default_param == False: cmd += dataset.get_model_param(ds, opt) run_mpol.run_mpol(opt, result_file, ds, cmd) print '\nparsing result...' #write the result to file parse_file = dst_folder + '/%s' % opt + '_%d' % k + '_%s' % mp_method + '_%d' % m + '.txt' result_once = run_util.parse_result( result_file, parse_file) result_all = add_to_dict(opt, mp_method, result_all, result_once) if mp_method == 'none': break #remove previous file if rand_num > 1: os.system('rm -f %s' % rand_file_cache) os.system('rm -f %s' % rand_file) #average the result for opt in opt_list: for mp in mp_list: rows = len(result_all[opt][mp]) cols = len(result_all[opt][mp][0]) divid = rand_num if mp != 'none': divid *= run_time for k in range(0, rows): for m in range(0, cols): result_all[opt][mp][k][m] /= divid return result_all
def train_model(path_list,dst_folder): train_file = path_list[0] test_file = path_list[1] result_all = {} #random the file if rand_num > 1: rand_file = train_file + '_rand' else: rand_file = train_file rand_file_cache = rand_file + '_cache' for k in range(0,rand_num): if rand_num > 1: print 'shuffle datset...' sol_shuffle.sol_shuffle(train_file, rand_file) cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache) dataset.analyze(rand_file); for opt in opt_list: for mp_method in mp_list: cmd_mp = ' -mpt %s ' %mp_method for m in range(0,run_time): print '-----------------------------------' print ' Experiment on %s' %opt + ' Random %d' %k + ' Multi-Pass %s' %mp_method + ' Round %d' %m print '-----------------------------------' result_file = dst_folder + '/%s' %opt + '_rand_%d' %k + '_mp_%s' %mp_method + '_round_%d' %m + '.txt' cmd = cmd_data cmd += cmd_mp cmd += extra_cmd if is_default_param == False: cmd += dataset.get_model_param(ds, opt) run_mpol.run_mpol(opt,result_file,ds, cmd) print '\nparsing result...' #write the result to file parse_file = dst_folder +'/%s' %opt + '_%d' %k + '_%s' %mp_method + '_%d' %m + '.txt' result_once = run_util.parse_result(result_file, parse_file); result_all = add_to_dict(opt,mp_method,result_all, result_once) if mp_method == 'none': break #remove previous file if rand_num > 1: os.system('rm -f %s' %rand_file_cache) os.system('rm -f %s' %rand_file) #average the result for opt in opt_list: for mp in mp_list: rows = len(result_all[opt][mp]) cols = len(result_all[opt][mp][0]) divid = rand_num if mp != 'none': divid *= run_time for k in range(0,rows): for m in range(0,cols): result_all[opt][mp][k][m] /= divid return result_all
def run_mRMR(train_file, test_file,ds, result_file): result_all = [] train_exe_name = exe_path.mRMR #make the result dir dst_folder = './%s' %ds run_util.create_dir(dst_folder) data_valid_dim = run_util.get_valid_dim(train_file) data_num = run_util.get_data_num(train_file) #bs_list = l1_def.get_lambda_list(ds,'mRMR') if 'synthetic' in ds: bs_list = l1_def.get_lambda_list(ds,'mRMR') else: lambda_list = l1_def.get_lambda_list(ds,'mRMR') bs_list = [] b_num = len(lambda_list) for i in range(0,b_num): dim = int(data_valid_dim * (1 - lambda_list[i])) if dim > 0 and dim <= 500: bs_list.append(dim) bs_list = l1_def.get_lambda_list(ds,'mRMR') #clear the file if it already exists open(result_file,'w').close() for bs in bs_list: result_once = [0,0,0,0] model_file = dst_folder + '/mRMR_model%d' %bs parse_file = dst_folder + '/mRMR_model_parse%d' %bs if os.path.exists(model_file) == False: print model_file + ' not exist' csv_train_file = train_file + '.csv' if os.path.exists(csv_train_file) == False: #convert data print 'convert data' cmd = exe_path.csv_converter + ' -i %s' %train_file + ' -o %s' %csv_train_file cmd += ' -sdt libsvm -ddt csv' print cmd os.system(cmd) #run mRMR prev_cmd = train_exe_name + ' -v %d' %data_valid_dim + ' -t 0.5 -i %s' %csv_train_file cmd = prev_cmd + ' -n %d' %bs + ' > %s' %model_file print cmd start_time =time.time() os.system(cmd) end_time = time.time() #parse learning time train_time = (float)(end_time - start_time) result_once[3] = train_time #parse result parse_model_file(model_file,parse_file); #run OGD cmd_data = dataset.get_cmd_data_by_file(train_file, test_file,True) cmd = exe_path.SOL_exe_name + cmd_data + ' -m %s' %parse_file + ' -k %d' %bs cmd += dataset.get_model_param(ds,'SGD-FS') cmd += ' -opt mRMR_OGD -norm -loss Hinge >> %s' %result_file print cmd os.system(cmd) result_once[2] = bs result_all.append(result_once) return result_all