Ejemplo n.º 1
0
def run_experiment(opt_name,result_file, dataset, extra_cmd):
    #clear the file if it already exists
    open(result_file,'w').close()
    
    #evaluate the result
    cmd_prefix = run_util.exe_name + extra_cmd + ' -opt %s' %opt_name 
    cmd_postfix = ' >> %s' %result_file

    if 'synthetic' in dataset:
        bs_list = l1_def.get_lambda_list(dataset, opt_name)
    else:
        temp_list = extra_cmd.split()
        train_file = ''
        for k in range(0,len(temp_list)):
            if temp_list[k] == '-i':
                train_file = temp_list[k+1]
                break;
        if train_file == '':
            print 'no input file is specified!'
            sys.exit()
        data_valid_dim = run_util.get_valid_dim(train_file)
        lambda_list = l1_def.get_lambda_list(dataset, opt_name)

        bs_list = []
        b_num = len(lambda_list)
        for i in range(0,b_num):
            dim = data_valid_dim * (1 - lambda_list[i])
            if dim > 0:
                bs_list.append(dim)

    for bs in bs_list:
        cmd = cmd_prefix + ' -k %d' %bs + cmd_postfix
        print cmd
        os.system(cmd)
Ejemplo n.º 2
0
def run_experiment(opt_name, result_file, dataset, extra_cmd):
    #clear the file if it already exists
    open(result_file, 'w').close()

    #evaluate the result
    cmd_prefix = run_util.exe_name + extra_cmd + ' -opt %s' % opt_name
    cmd_postfix = ' >> %s' % result_file

    if 'synthetic' in dataset:
        bs_list = l1_def.get_lambda_list(dataset, opt_name)
    else:
        temp_list = extra_cmd.split()
        train_file = ''
        for k in range(0, len(temp_list)):
            if temp_list[k] == '-i':
                train_file = temp_list[k + 1]
                break
        if train_file == '':
            print 'no input file is specified!'
            sys.exit()
        data_valid_dim = run_util.get_valid_dim(train_file)
        lambda_list = l1_def.get_lambda_list(dataset, opt_name)

        bs_list = []
        b_num = len(lambda_list)
        for i in range(0, b_num):
            dim = data_valid_dim * (1 - lambda_list[i])
            if dim > 0:
                bs_list.append(dim)

    for bs in bs_list:
        cmd = cmd_prefix + ' -k %d' % bs + cmd_postfix
        print cmd
        os.system(cmd)
Ejemplo n.º 3
0
def run_fgm(train_file, test_file,ds, ori_result_file):
    result_all = []

    train_exe_name = exe_path.fgm_train
    test_exe_name = exe_path.fgm_test

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds,'fgm')
    else:
        data_valid_dim = run_util.get_valid_dim(train_file)
        lambda_list = l1_def.get_lambda_list(ds,'fgm')

        bs_list = []
        b_num = len(lambda_list)
        for i in range(0,b_num):
            dim = (int)(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0:
                bs_list.append(dim)

    for bs in bs_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/fgm_model%g' %bs
        predict_file   = dst_folder + '/fgm_predict%g' %bs


        result_file = dst_folder + '/' + ori_result_file + '_%d' %bs
        #clear the file if it already exists
        open(result_file,'w').close()

        #evaluate the result
        train_cmd = train_exe_name + ' -s 12 -c 10 -B %d' %bs + ' %s' %train_file + ' %s' %model_file 
        test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file + '>> %s' %result_file
        
        print train_cmd
        start_time =time.time()
        os.system(train_cmd)
        end_time = time.time()

        #parse learning time
        train_time = (float)(end_time - start_time) 
        result_once[3] = train_time
        
        #predict
        print test_cmd
        os.system(test_cmd)
        result_once[1] = liblinear_util.parse_error_rate(result_file)
        result_once[2] = bs

        result_all.append(result_once)
    return result_all
Ejemplo n.º 4
0
def run_fgm(train_file, test_file, ds, ori_result_file):
    result_all = []

    train_exe_name = exe_path.fgm_train
    test_exe_name = exe_path.fgm_test

    #make the result dir
    dst_folder = './%s' % ds
    run_util.create_dir(dst_folder)

    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds, 'fgm')
    else:
        data_valid_dim = run_util.get_valid_dim(train_file)
        lambda_list = l1_def.get_lambda_list(ds, 'fgm')

        bs_list = []
        b_num = len(lambda_list)
        for i in range(0, b_num):
            dim = (int)(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0:
                bs_list.append(dim)

    for bs in bs_list:
        result_once = [0, 0, 0, 0]
        model_file = dst_folder + '/fgm_model%g' % bs
        predict_file = dst_folder + '/fgm_predict%g' % bs

        result_file = dst_folder + '/' + ori_result_file + '_%d' % bs
        #clear the file if it already exists
        open(result_file, 'w').close()

        #evaluate the result
        train_cmd = train_exe_name + ' -s 12 -c 10 -B %d' % bs + ' %s' % train_file + ' %s' % model_file
        test_cmd = test_exe_name + ' %s' % test_file + ' %s' % model_file + ' %s' % predict_file + '>> %s' % result_file

        print train_cmd
        start_time = time.time()
        os.system(train_cmd)
        end_time = time.time()

        #parse learning time
        train_time = (float)(end_time - start_time)
        result_once[3] = train_time

        #predict
        print test_cmd
        os.system(test_cmd)
        result_once[1] = liblinear_util.parse_error_rate(result_file)
        result_once[2] = bs

        result_all.append(result_once)
    return result_all
Ejemplo n.º 5
0
def run_experiment(opt_name,result_file, dataset, extra_cmd):
    #clear the file if it already exists
    open(result_file,'w').close()
    
    #evaluate the result
    cmd_prefix = run_util.exe_name + extra_cmd + ' -opt %s' %opt_name 
    cmd_postfix = ' >> %s' %result_file
    
    lambda_list = l1_def.get_lambda_list(dataset, opt_name)

    if opt_name == 'AROW-FS' or opt_name == 'SGD-FS' or opt_name == 'OFSGD':
        temp_list = extra_cmd.split()
        train_file = ''
        for k in range(0,len(temp_list)):
            if temp_list[k] == '-i':
                train_file = temp_list[k+1]
                break;
        if train_file == '':
            print 'no input file is specified!'
            sys.exit()
        data_valid_dim = run_util.get_valid_dim(train_file)

    for l1 in lambda_list:
        if opt_name == 'AROW-FS' or opt_name == 'SGD-FS' or opt_name == 'OFSGD':
            l1 = (int)(l1 * data_valid_dim)
            if l1 < 1:
		        continue
            cmd = cmd_prefix + ' -k %d' %l1 + cmd_postfix
        else:
            cmd = cmd_prefix + ' -l1 %e' %l1 + cmd_postfix
        print cmd
        os.system(cmd)
Ejemplo n.º 6
0
def run_liblinear(train_file, test_file,ds, ori_result_file):
    result_all = []

    train_exe_name = exe_path.liblinar_train_exe_name 
    test_exe_name = exe_path.liblinar_test_exe_name 

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    c_list = l1_def.get_lambda_list(ds,'liblinear')

    for c in c_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/ll_model%g' %c
        predict_file   = dst_folder + '/ll_predict%g' %c


        result_file = dst_folder + '/' + ori_result_file + '_%f' %c
        #clear the file if it already exists
        open(result_file,'w').close()

        #evaluate the result
        train_cmd = train_exe_name + ' -s 5 -c %f' %c + ' %s' %train_file + ' %s' %model_file 
        test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file + '>> %s' %result_file
        #test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file 
        
        print train_cmd
        start_time =time.time()
        os.system(train_cmd)
        end_time = time.time()

        #parse learning time
        train_time = (float)(end_time - start_time) 
        result_once[3] = train_time
        
        #predict
        print test_cmd
        os.system(test_cmd)
        result_once[1] = liblinear_util.parse_error_rate(result_file)
        valid_dim = run_util.get_valid_dim(train_file)
        model_size = liblinear_util.get_model_size(model_file)
        result_once[2] = model_size

        result_all.append(result_once)
    return result_all
Ejemplo n.º 7
0
def run_vw(train_file, test_file, ds, result_file, is_cache=True):
    tmp_folder = ds + '/vw_tmp'
    #os.system('mkdir -p %s' %tmp_folder)
    run_util.create_dir(tmp_folder)

    model_file = tmp_folder + '/vw_model'
    rd_model_file = tmp_folder + '/vw_model.txt'
    tmp_file = tmp_folder + '/vw_tmp.txt'
    result_file = tmp_folder + '/' + result_file

    extra_cmd = ' --sgd --binary --loss_function=hinge '
    model_cmd = ' --readable_model %s' % rd_model_file + ' -f %s ' % model_file

    valid_dim = run_util.get_valid_dim(train_file)

    #transform into vw format
    if os.path.exists('%s.vw' % train_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' % train_file)
    if os.path.exists('%s.vw' % test_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' % test_file)

    if is_cache == True:
        cache_train = train_file + "_cache.vw"
        cache_test = test_file + "_cache.vw"

    train_file += ".vw"
    test_file += ".vw"

    #evaluate the result
    if is_cache == True:
        train_cmd_prefix = '%s' % exe_name + ' %s' % train_file + ' --cache_file %s ' % cache_train
        test_cmd_prefix = '%s' % exe_name + ' %s' % test_file + ' -t -i %s' % model_file + ' --cache_file %s ' % cache_test
    else:
        train_cmd_prefix = '%s' % exe_name + ' %s' % train_file
        test_cmd_prefix = '%s' % exe_name + ' %s' % test_file + ' -t -i %s' % model_file

    cmd_postfix = ' 2> %s' % tmp_file

    result_list = []

    lambda_list = l1_def.get_lambda_list(ds, 'vw')
    for l1 in lambda_list:
        result_item = [0, 0, 0, 0]
        #train
        cmd = train_cmd_prefix + ' --l1 %e' % l1 + extra_cmd + model_cmd + cmd_postfix
        print cmd
        start_time = time.time()
        os.system(cmd)
        end_time = time.time()
        #parse learning time
        result_item[3] = (float)(end_time - start_time)
        result_item[3] = (float)('%.2f' % result_item[3])

        #parse learn error rate
        result_item[0] = vw_util.parse_error_rate(tmp_file)
        #test
        cmd = test_cmd_prefix + extra_cmd + cmd_postfix
        print cmd
        os.system(cmd)

        result_item[1] = vw_util.parse_error_rate(tmp_file)

        #parse sparsity
        model_size = vw_util.get_model_size(rd_model_file)
        result_item[2] = 100 - (model_size * 100.0 / valid_dim)

        result_list.append(result_item)

    vw_util.write_parse_result(result_list, result_file)
    return result_list
Ejemplo n.º 8
0
Archivo: run_vw.py Proyecto: fgtlss/sol
def run_vw(train_file, test_file,ds,result_file, is_cache = True):
    tmp_folder = ds + '/vw_tmp'
    #os.system('mkdir -p %s' %tmp_folder)
    run_util.create_dir(tmp_folder)
    
    model_file = tmp_folder + '/vw_model'
    rd_model_file = tmp_folder + '/vw_model.txt'
    tmp_file = tmp_folder + '/vw_tmp.txt'
    result_file = tmp_folder + '/' + result_file
    
    extra_cmd = ' --sgd --binary --loss_function=hinge '
    model_cmd = ' --readable_model %s' %rd_model_file + ' -f %s ' %model_file
    
    valid_dim = run_util.get_valid_dim(train_file)
    
    #transform into vw format
    if os.path.exists('%s.vw' %train_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' %train_file)
    if os.path.exists('%s.vw' %test_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' %test_file)
    
    if is_cache == True:
        cache_train = train_file + "_cache.vw"
        cache_test = test_file + "_cache.vw"
    
    train_file += ".vw"
    test_file += ".vw"
    
    #evaluate the result
    if is_cache == True:
        train_cmd_prefix = '%s' %exe_name + ' %s' %train_file +' --cache_file %s ' %cache_train
        test_cmd_prefix = '%s'  %exe_name + ' %s' %test_file + ' -t -i %s' %model_file + ' --cache_file %s ' %cache_test
    else:
        train_cmd_prefix = '%s' %exe_name + ' %s' %train_file 
        test_cmd_prefix = '%s'  %exe_name + ' %s' %test_file + ' -t -i %s' %model_file 
    
    cmd_postfix = ' 2> %s' %tmp_file
    
    result_list = []
    
    
    lambda_list = l1_def.get_lambda_list(ds,'vw')
    for l1 in lambda_list:
        result_item = [0,0,0,0]
        #train
        cmd = train_cmd_prefix + ' --l1 %e' %l1 + extra_cmd + model_cmd +  cmd_postfix
        print cmd
        start_time =time.time()
        os.system(cmd)
        end_time = time.time()
        #parse learning time
        result_item[3] = (float)(end_time - start_time) 
        result_item[3] = (float)('%.2f' %result_item[3]) 
    
        #parse learn error rate
        result_item[0] = vw_util.parse_error_rate(tmp_file)
        #test
        cmd = test_cmd_prefix + extra_cmd + cmd_postfix
        print cmd
        os.system(cmd)
    
        result_item[1] = vw_util.parse_error_rate(tmp_file)
    
        #parse sparsity
        model_size = vw_util.get_model_size(rd_model_file)
        result_item[2] = 100 - (model_size * 100.0 / valid_dim)
    
        result_list.append(result_item)
    
    vw_util.write_parse_result(result_list,result_file)
    return result_list
Ejemplo n.º 9
0
def run_mRMR(train_file, test_file,ds, result_file):
    result_all = []

    train_exe_name = exe_path.mRMR

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    data_valid_dim = run_util.get_valid_dim(train_file)
    data_num = run_util.get_data_num(train_file)

    #bs_list = l1_def.get_lambda_list(ds,'mRMR')
    
    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds,'mRMR')
    else:
        lambda_list = l1_def.get_lambda_list(ds,'mRMR')

        bs_list = [] 
        b_num = len(lambda_list)
        for i in range(0,b_num):
            dim = int(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0 and dim <= 500:
                bs_list.append(dim)

    bs_list = l1_def.get_lambda_list(ds,'mRMR')

    #clear the file if it already exists
    open(result_file,'w').close()

    for bs in bs_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/mRMR_model%d' %bs
        parse_file = dst_folder + '/mRMR_model_parse%d' %bs 

        if os.path.exists(model_file) == False:
            print model_file + ' not exist'
            csv_train_file = train_file + '.csv'
            if os.path.exists(csv_train_file) == False:
                #convert data
                print 'convert data'
                cmd = exe_path.csv_converter + ' -i %s' %train_file + ' -o %s' %csv_train_file
                cmd += ' -sdt libsvm -ddt csv'
                print cmd
                os.system(cmd)

            #run mRMR
            prev_cmd = train_exe_name + ' -v %d' %data_valid_dim + ' -t 0.5 -i %s' %csv_train_file 
            cmd = prev_cmd + ' -n %d' %bs + ' > %s' %model_file
            print cmd
            start_time =time.time()
            os.system(cmd)
            end_time = time.time()

            #parse learning time
            train_time = (float)(end_time - start_time) 
            result_once[3] = train_time

            #parse result
            parse_model_file(model_file,parse_file);

        #run OGD
        cmd_data = dataset.get_cmd_data_by_file(train_file, test_file,True)
        cmd = exe_path.SOL_exe_name + cmd_data + ' -m %s' %parse_file + ' -k %d' %bs
        cmd += dataset.get_model_param(ds,'SGD-FS')
        cmd += ' -opt mRMR_OGD -norm -loss Hinge >> %s' %result_file

        print cmd
        os.system(cmd)

        result_once[2] = bs
        
        result_all.append(result_once)
    return result_all