Beispiel #1
0
def train_model(path_list,dst_folder):
    train_file = path_list[0]
    test_file = path_list[1]

    result_all = {}
    #random the file
    if rand_num > 1:
        rand_file = train_file + '_rand'  
    else:
	    rand_file = train_file

    rand_file_cache = rand_file + '_cache'

    for k in range(0,rand_num):
        if rand_num > 1:
            print 'shuffle datset...'
            sol_shuffle.sol_shuffle(train_file, rand_file)

        cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache)
        dataset.analyze(rand_file);

        for opt in opt_list:
            print '-----------------------------------'
            print ' Experiment on %s' %opt + ' Random %d' %k 
            print '-----------------------------------'

            if opt == 'vw':
                result_file = 'vw_result_%d' %k + '.txt'
                result_once = run_vw.run_vw(rand_file, test_file, ds, result_file, is_cache)
            elif  opt == 'liblinear':
                result_file = 'liblinear_result_%d' %k + '.txt'
                result_once = run_liblinear.run_liblinear(rand_file, test_file, ds, result_file)
            elif opt == 'fgm':
                result_file = 'fgm_result_%d' %k + '.txt'
                result_once = run_fgm.run_fgm(rand_file, test_file, ds, result_file)
            elif opt == 'mRMR':
                result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt'
                result_once = run_mRMR.run_mRMR(rand_file, test_file, ds, result_file)

                print '\nparsing result...'
                #write the result to file
                parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt'

                result_once2 = run_util.parse_result(result_file, parse_file);
                bs_num = len(result_once)
                if bs_num != len(result_once2):
                    print 'inconsistent parsing result'
                for m in range(0,bs_num):
                    result_once[m][0] = result_once2[m][0]
                    result_once[m][1] = result_once2[m][1]
                    if result_once[m][2] == 0:
                        result_once[m][2] = result_once2[m][2]
                    if result_once[m][3] == 0:
                        result_once[m][3] = result_once2[m][3]
            else:
                result_file = dst_folder + '/%s' %opt + '_result_%d' %k + '.txt'

                cmd = cmd_data
                cmd += extra_cmd
                if is_default_param == False:
                    cmd += dataset.get_model_param(ds, opt)

                run_experiment.run_experiment(opt,result_file,ds, cmd)

                print '\nparsing result...'
                #write the result to file
                parse_file = dst_folder +'/%s' %opt + '_%d' %k + '.txt'

                result_once = run_util.parse_result(result_file, parse_file);
            result_all = add_to_dict(opt,result_all, result_once)

        #remove previous file
        if rand_num > 1:
            os.system('rm -f %s' %rand_file_cache)
            os.system('rm -f %s' %rand_file)


    #average the result
    for opt in opt_list:
        rows = len(result_all[opt])
        cols = len(result_all[opt][0])

        for k in range(0,rows):
            for m in range(0,cols):
                result_all[opt][k][m] /= rand_num

    return result_all 
Beispiel #2
0
def train_model(path_list,dst_folder):
    train_file = path_list[0]
    test_file = path_list[1]

    result_all = {}
    #random the file
    if rand_num > 1:
        rand_file = train_file + '_rand'  
    else:
	    rand_file = train_file

    rand_file_cache = rand_file + '_cache'

    for k in range(0,rand_num):
        if rand_num > 1:
            print 'shuffle datset...'
            sol_shuffle.sol_shuffle(train_file, rand_file)

        cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache)
        dataset.analyze(rand_file);

        for opt in opt_list:
            for mp_method in mp_list:
                cmd_mp = ' -mpt %s ' %mp_method 
                for m in range(0,run_time):
                    print '-----------------------------------'
                    print ' Experiment on %s' %opt + ' Random %d' %k  + ' Multi-Pass %s' %mp_method + ' Round %d' %m
                    print '-----------------------------------'

                    result_file = dst_folder + '/%s' %opt + '_rand_%d' %k + '_mp_%s' %mp_method + '_round_%d' %m + '.txt'

                    cmd = cmd_data
                    cmd += cmd_mp
                    cmd += extra_cmd
                    if is_default_param == False:
                        cmd += dataset.get_model_param(ds, opt)

                    run_mpol.run_mpol(opt,result_file,ds, cmd)

                    print '\nparsing result...'
                    #write the result to file
                    parse_file = dst_folder +'/%s' %opt + '_%d' %k + '_%s' %mp_method + '_%d' %m  + '.txt'

                    result_once = run_util.parse_result(result_file, parse_file);
                    result_all = add_to_dict(opt,mp_method,result_all, result_once)

                    if mp_method == 'none':
                        break

        #remove previous file
        if rand_num > 1:
            os.system('rm -f %s' %rand_file_cache)
            os.system('rm -f %s' %rand_file)


    #average the result
    for opt in opt_list:
        for mp in mp_list:
            rows = len(result_all[opt][mp])
            cols = len(result_all[opt][mp][0])

            divid = rand_num
            if mp != 'none':
                divid *= run_time
            for k in range(0,rows):
                for m in range(0,cols):
                    result_all[opt][mp][k][m] /= divid

    return result_all 
Beispiel #3
0
def train_model(path_list, dst_folder):
    train_file = path_list[0]
    test_file = path_list[1]

    result_all = {}
    #random the file
    if rand_num > 1:
        rand_file = train_file + '_rand'
    else:
        rand_file = train_file

    rand_file_cache = rand_file + '_cache'

    for k in range(0, rand_num):
        if rand_num > 1:
            print 'shuffle datset...'
            sol_shuffle.sol_shuffle(train_file, rand_file)

        cmd_data = dataset.get_cmd_data_by_file(rand_file, test_file, is_cache)
        dataset.analyze(rand_file)

        for opt in opt_list:
            for mp_method in mp_list:
                cmd_mp = ' -mpt %s ' % mp_method
                for m in range(0, run_time):
                    print '-----------------------------------'
                    print ' Experiment on %s' % opt + ' Random %d' % k + ' Multi-Pass %s' % mp_method + ' Round %d' % m
                    print '-----------------------------------'

                    result_file = dst_folder + '/%s' % opt + '_rand_%d' % k + '_mp_%s' % mp_method + '_round_%d' % m + '.txt'

                    cmd = cmd_data
                    cmd += cmd_mp
                    cmd += extra_cmd
                    if is_default_param == False:
                        cmd += dataset.get_model_param(ds, opt)

                    run_mpol.run_mpol(opt, result_file, ds, cmd)

                    print '\nparsing result...'
                    #write the result to file
                    parse_file = dst_folder + '/%s' % opt + '_%d' % k + '_%s' % mp_method + '_%d' % m + '.txt'

                    result_once = run_util.parse_result(
                        result_file, parse_file)
                    result_all = add_to_dict(opt, mp_method, result_all,
                                             result_once)

                    if mp_method == 'none':
                        break

        #remove previous file
        if rand_num > 1:
            os.system('rm -f %s' % rand_file_cache)
            os.system('rm -f %s' % rand_file)

    #average the result
    for opt in opt_list:
        for mp in mp_list:
            rows = len(result_all[opt][mp])
            cols = len(result_all[opt][mp][0])

            divid = rand_num
            if mp != 'none':
                divid *= run_time
            for k in range(0, rows):
                for m in range(0, cols):
                    result_all[opt][mp][k][m] /= divid

    return result_all
Beispiel #4
0
def run_mRMR(train_file, test_file,ds, result_file):
    result_all = []

    train_exe_name = exe_path.mRMR

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    data_valid_dim = run_util.get_valid_dim(train_file)
    data_num = run_util.get_data_num(train_file)

    #bs_list = l1_def.get_lambda_list(ds,'mRMR')
    
    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds,'mRMR')
    else:
        lambda_list = l1_def.get_lambda_list(ds,'mRMR')

        bs_list = [] 
        b_num = len(lambda_list)
        for i in range(0,b_num):
            dim = int(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0 and dim <= 500:
                bs_list.append(dim)

    bs_list = l1_def.get_lambda_list(ds,'mRMR')

    #clear the file if it already exists
    open(result_file,'w').close()

    for bs in bs_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/mRMR_model%d' %bs
        parse_file = dst_folder + '/mRMR_model_parse%d' %bs 

        if os.path.exists(model_file) == False:
            print model_file + ' not exist'
            csv_train_file = train_file + '.csv'
            if os.path.exists(csv_train_file) == False:
                #convert data
                print 'convert data'
                cmd = exe_path.csv_converter + ' -i %s' %train_file + ' -o %s' %csv_train_file
                cmd += ' -sdt libsvm -ddt csv'
                print cmd
                os.system(cmd)

            #run mRMR
            prev_cmd = train_exe_name + ' -v %d' %data_valid_dim + ' -t 0.5 -i %s' %csv_train_file 
            cmd = prev_cmd + ' -n %d' %bs + ' > %s' %model_file
            print cmd
            start_time =time.time()
            os.system(cmd)
            end_time = time.time()

            #parse learning time
            train_time = (float)(end_time - start_time) 
            result_once[3] = train_time

            #parse result
            parse_model_file(model_file,parse_file);

        #run OGD
        cmd_data = dataset.get_cmd_data_by_file(train_file, test_file,True)
        cmd = exe_path.SOL_exe_name + cmd_data + ' -m %s' %parse_file + ' -k %d' %bs
        cmd += dataset.get_model_param(ds,'SGD-FS')
        cmd += ' -opt mRMR_OGD -norm -loss Hinge >> %s' %result_file

        print cmd
        os.system(cmd)

        result_once[2] = bs
        
        result_all.append(result_once)
    return result_all