Esempio n. 1
0
def train_model(path_list, dst_folder):
    train_file = path_list[0]
    test_file = path_list[1]

    result_all = {}
    #random the file
    if rand_num > 1:
        rand_file = train_file + '_rand'
    else:
        rand_file = train_file

    rand_file_cache = rand_file + '_cache'

    for k in range(0, rand_num):
        if rand_num > 1:
            print 'shuffle datset...'
            sol_shuffle.sol_shuffle(train_file, rand_file)

        cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache)
        dataset.analyze(rand_file)

        for opt in opt_list:
            for mp_method in mp_list:
                cmd_mp = ' -mpt %s ' % mp_method
                for m in range(0, run_time):
                    print '-----------------------------------'
                    print ' Experiment on %s' % opt + ' Random %d' % k + ' Multi-Pass %s' % mp_method + ' Round %d' % m
                    print '-----------------------------------'

                    result_file = dst_folder + '/%s' % opt + '_rand_%d' % k + '_mp_%s' % mp_method + '_round_%d' % m + '.txt'

                    cmd = cmd_data
                    cmd += cmd_mp
                    cmd += extra_cmd
                    if is_default_param == False:
                        cmd += dataset.get_model_param(ds, opt)

                    run_mpol.run_mpol(opt, result_file, ds, cmd)

                    print '\nparsing result...'
                    #write the result to file
                    parse_file = dst_folder + '/%s' % opt + '_%d' % k + '_%s' % mp_method + '_%d' % m + '.txt'

                    result_once = run_util.parse_result(
                        result_file, parse_file)
                    result_all = add_to_dict(opt, mp_method, result_all,
                                             result_once)

                    if mp_method == 'none':
                        break

        #remove previous file
        if rand_num > 1:
            os.system('rm -f %s' % rand_file_cache)
            os.system('rm -f %s' % rand_file)

    #average the result
    for opt in opt_list:
        for mp in mp_list:
            rows = len(result_all[opt][mp])
            cols = len(result_all[opt][mp][0])

            divid = rand_num
            if mp != 'none':
                divid *= run_time
            for k in range(0, rows):
                for m in range(0, cols):
                    result_all[opt][mp][k][m] /= divid

    return result_all
Esempio n. 2
0
def train_model(path_list,dst_folder):
    train_file = path_list[0]
    test_file = path_list[1]

    result_all = {}
    #random the file
    if rand_num > 1:
        rand_file = train_file + '_rand'  
    else:
	    rand_file = train_file

    rand_file_cache = rand_file + '_cache'

    for k in range(0,rand_num):
        if rand_num > 1:
            print 'shuffle datset...'
            sol_shuffle.sol_shuffle(train_file, rand_file)

        cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache)
        dataset.analyze(rand_file);

        for opt in opt_list:
            print '-----------------------------------'
            print ' Experiment on %s' %opt + ' Random %d' %k 
            print '-----------------------------------'

            if opt == 'vw':
                result_file = 'vw_result_%d' %k + '.txt'
                result_once = run_vw.run_vw(rand_file, test_file, ds, result_file, is_cache)
            elif  opt == 'liblinear':
                result_file = 'liblinear_result_%d' %k + '.txt'
                result_once = run_liblinear.run_liblinear(rand_file, test_file, ds, result_file)
            elif opt == 'fgm':
                result_file = 'fgm_result_%d' %k + '.txt'
                result_once = run_fgm.run_fgm(rand_file, test_file, ds, result_file)
            elif opt == 'mRMR':
                result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt'
                result_once = run_mRMR.run_mRMR(rand_file, test_file, ds, result_file)

                print '\nparsing result...'
                #write the result to file
                parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt'

                result_once2 = run_util.parse_result(result_file, parse_file);
                bs_num = len(result_once)
                if bs_num != len(result_once2):
                    print 'inconsistent parsing result'
                for m in range(0,bs_num):
                    result_once[m][0] = result_once2[m][0]
                    result_once[m][1] = result_once2[m][1]
                    if result_once[m][2] == 0:
                        result_once[m][2] = result_once2[m][2]
                    if result_once[m][3] == 0:
                        result_once[m][3] = result_once2[m][3]
            else:
                result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt'

                cmd = cmd_data
                cmd += extra_cmd
                if is_default_param == False:
                    cmd += dataset.get_model_param(ds, opt)

                run_experiment.run_experiment(opt,result_file,ds, cmd)

                print '\nparsing result...'
                #write the result to file
                parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt'

                result_once = run_util.parse_result(result_file, parse_file);
            result_all = add_to_dict(opt,result_all, result_once)

        #remove previous file
        if rand_num > 1:
            os.system('rm -f %s' %rand_file_cache)
            os.system('rm -f %s' %rand_file)


    #average the result
    for opt in opt_list:
        rows = len(result_all[opt])
        cols = len(result_all[opt][0])

        for k in range(0,rows):
            for m in range(0,cols):
                result_all[opt][k][m] /= rand_num

    return result_all 
Esempio n. 3
0
File: mpol.py Progetto: fgtlss/sol
def train_model(path_list,dst_folder):
    train_file = path_list[0]
    test_file = path_list[1]

    result_all = {}
    #random the file
    if rand_num > 1:
        rand_file = train_file + '_rand'  
    else:
	    rand_file = train_file

    rand_file_cache = rand_file + '_cache'

    for k in range(0,rand_num):
        if rand_num > 1:
            print 'shuffle datset...'
            sol_shuffle.sol_shuffle(train_file, rand_file)

        cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache)
        dataset.analyze(rand_file);

        for opt in opt_list:
            for mp_method in mp_list:
                cmd_mp = ' -mpt %s ' %mp_method 
                for m in range(0,run_time):
                    print '-----------------------------------'
                    print ' Experiment on %s' %opt + ' Random %d' %k  + ' Multi-Pass %s' %mp_method + ' Round %d' %m
                    print '-----------------------------------'

                    result_file = dst_folder + '/%s' %opt + '_rand_%d' %k + '_mp_%s' %mp_method + '_round_%d' %m + '.txt'

                    cmd = cmd_data
                    cmd += cmd_mp
                    cmd += extra_cmd
                    if is_default_param == False:
                        cmd += dataset.get_model_param(ds, opt)

                    run_mpol.run_mpol(opt,result_file,ds, cmd)

                    print '\nparsing result...'
                    #write the result to file
                    parse_file = dst_folder +'/%s' %opt + '_%d' %k + '_%s' %mp_method + '_%d' %m  + '.txt'

                    result_once = run_util.parse_result(result_file, parse_file);
                    result_all = add_to_dict(opt,mp_method,result_all, result_once)

                    if mp_method == 'none':
                        break

        #remove previous file
        if rand_num > 1:
            os.system('rm -f %s' %rand_file_cache)
            os.system('rm -f %s' %rand_file)


    #average the result
    for opt in opt_list:
        for mp in mp_list:
            rows = len(result_all[opt][mp])
            cols = len(result_all[opt][mp][0])

            divid = rand_num
            if mp != 'none':
                divid *= run_time
            for k in range(0,rows):
                for m in range(0,cols):
                    result_all[opt][mp][k][m] /= divid

    return result_all