def train_model(path_list, dst_folder): train_file = path_list[0] test_file = path_list[1] result_all = {} #random the file if rand_num > 1: rand_file = train_file + '_rand' else: rand_file = train_file rand_file_cache = rand_file + '_cache' for k in range(0, rand_num): if rand_num > 1: print 'shuffle datset...' sol_shuffle.sol_shuffle(train_file, rand_file) cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache) dataset.analyze(rand_file) for opt in opt_list: for mp_method in mp_list: cmd_mp = ' -mpt %s ' % mp_method for m in range(0, run_time): print '-----------------------------------' print ' Experiment on %s' % opt + ' Random %d' % k + ' Multi-Pass %s' % mp_method + ' Round %d' % m print '-----------------------------------' result_file = dst_folder + '/%s' % opt + '_rand_%d' % k + '_mp_%s' % mp_method + '_round_%d' % m + '.txt' cmd = cmd_data cmd += cmd_mp cmd += extra_cmd if is_default_param == False: cmd += dataset.get_model_param(ds, opt) run_mpol.run_mpol(opt, result_file, ds, cmd) print '\nparsing result...' #write the result to file parse_file = dst_folder + '/%s' % opt + '_%d' % k + '_%s' % mp_method + '_%d' % m + '.txt' result_once = run_util.parse_result( result_file, parse_file) result_all = add_to_dict(opt, mp_method, result_all, result_once) if mp_method == 'none': break #remove previous file if rand_num > 1: os.system('rm -f %s' % rand_file_cache) os.system('rm -f %s' % rand_file) #average the result for opt in opt_list: for mp in mp_list: rows = len(result_all[opt][mp]) cols = len(result_all[opt][mp][0]) divid = rand_num if mp != 'none': divid *= run_time for k in range(0, rows): for m in range(0, cols): result_all[opt][mp][k][m] /= divid return result_all
def train_model(path_list,dst_folder): train_file = path_list[0] test_file = path_list[1] result_all = {} #random the file if rand_num > 1: rand_file = train_file + '_rand' else: rand_file = train_file rand_file_cache = rand_file + '_cache' for k in range(0,rand_num): if rand_num > 1: print 'shuffle datset...' sol_shuffle.sol_shuffle(train_file, rand_file) cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache) dataset.analyze(rand_file); for opt in opt_list: print '-----------------------------------' print ' Experiment on %s' %opt + ' Random %d' %k print '-----------------------------------' if opt == 'vw': result_file = 'vw_result_%d' %k + '.txt' result_once = run_vw.run_vw(rand_file, test_file, ds, result_file, is_cache) elif opt == 'liblinear': result_file = 'liblinear_result_%d' %k + '.txt' result_once = run_liblinear.run_liblinear(rand_file, test_file, ds, result_file) elif opt == 'fgm': result_file = 'fgm_result_%d' %k + '.txt' result_once = run_fgm.run_fgm(rand_file, test_file, ds, result_file) elif opt == 'mRMR': result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt' result_once = run_mRMR.run_mRMR(rand_file, test_file, ds, result_file) print '\nparsing result...' #write the result to file parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt' result_once2 = run_util.parse_result(result_file, parse_file); bs_num = len(result_once) if bs_num != len(result_once2): print 'inconsistent parsing result' for m in range(0,bs_num): result_once[m][0] = result_once2[m][0] result_once[m][1] = result_once2[m][1] if result_once[m][2] == 0: result_once[m][2] = result_once2[m][2] if result_once[m][3] == 0: result_once[m][3] = result_once2[m][3] else: result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt' cmd = cmd_data cmd += extra_cmd if is_default_param == False: cmd += dataset.get_model_param(ds, opt) run_experiment.run_experiment(opt,result_file,ds, cmd) print '\nparsing result...' #write the result to file parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt' result_once = run_util.parse_result(result_file, parse_file); result_all = add_to_dict(opt,result_all, result_once) #remove previous file if rand_num > 1: os.system('rm -f %s' %rand_file_cache) os.system('rm -f %s' %rand_file) #average the result for opt in opt_list: rows = len(result_all[opt]) cols = len(result_all[opt][0]) for k in range(0,rows): for m in range(0,cols): result_all[opt][k][m] /= rand_num return result_all
def train_model(path_list,dst_folder): train_file = path_list[0] test_file = path_list[1] result_all = {} #random the file if rand_num > 1: rand_file = train_file + '_rand' else: rand_file = train_file rand_file_cache = rand_file + '_cache' for k in range(0,rand_num): if rand_num > 1: print 'shuffle datset...' sol_shuffle.sol_shuffle(train_file, rand_file) cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache) dataset.analyze(rand_file); for opt in opt_list: for mp_method in mp_list: cmd_mp = ' -mpt %s ' %mp_method for m in range(0,run_time): print '-----------------------------------' print ' Experiment on %s' %opt + ' Random %d' %k + ' Multi-Pass %s' %mp_method + ' Round %d' %m print '-----------------------------------' result_file = dst_folder + '/%s' %opt + '_rand_%d' %k + '_mp_%s' %mp_method + '_round_%d' %m + '.txt' cmd = cmd_data cmd += cmd_mp cmd += extra_cmd if is_default_param == False: cmd += dataset.get_model_param(ds, opt) run_mpol.run_mpol(opt,result_file,ds, cmd) print '\nparsing result...' #write the result to file parse_file = dst_folder +'/%s' %opt + '_%d' %k + '_%s' %mp_method + '_%d' %m + '.txt' result_once = run_util.parse_result(result_file, parse_file); result_all = add_to_dict(opt,mp_method,result_all, result_once) if mp_method == 'none': break #remove previous file if rand_num > 1: os.system('rm -f %s' %rand_file_cache) os.system('rm -f %s' %rand_file) #average the result for opt in opt_list: for mp in mp_list: rows = len(result_all[opt][mp]) cols = len(result_all[opt][mp][0]) divid = rand_num if mp != 'none': divid *= run_time for k in range(0,rows): for m in range(0,cols): result_all[opt][mp][k][m] /= divid return result_all