Beispiel #1
0
def run_fgm(train_file, test_file,ds, ori_result_file):
    result_all = []

    train_exe_name = exe_path.fgm_train
    test_exe_name = exe_path.fgm_test

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds,'fgm')
    else:
        data_valid_dim = run_util.get_valid_dim(train_file)
        lambda_list = l1_def.get_lambda_list(ds,'fgm')

        bs_list = []
        b_num = len(lambda_list)
        for i in range(0,b_num):
            dim = (int)(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0:
                bs_list.append(dim)

    for bs in bs_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/fgm_model%g' %bs
        predict_file   = dst_folder + '/fgm_predict%g' %bs


        result_file = dst_folder + '/' + ori_result_file + '_%d' %bs
        #clear the file if it already exists
        open(result_file,'w').close()

        #evaluate the result
        train_cmd = train_exe_name + ' -s 12 -c 10 -B %d' %bs + ' %s' %train_file + ' %s' %model_file 
        test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file + '>> %s' %result_file
        
        print train_cmd
        start_time =time.time()
        os.system(train_cmd)
        end_time = time.time()

        #parse learning time
        train_time = (float)(end_time - start_time) 
        result_once[3] = train_time
        
        #predict
        print test_cmd
        os.system(test_cmd)
        result_once[1] = liblinear_util.parse_error_rate(result_file)
        result_once[2] = bs

        result_all.append(result_once)
    return result_all
Beispiel #2
0
def run_fgm(train_file, test_file, ds, ori_result_file):
    result_all = []

    train_exe_name = exe_path.fgm_train
    test_exe_name = exe_path.fgm_test

    #make the result dir
    dst_folder = './%s' % ds
    run_util.create_dir(dst_folder)

    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds, 'fgm')
    else:
        data_valid_dim = run_util.get_valid_dim(train_file)
        lambda_list = l1_def.get_lambda_list(ds, 'fgm')

        bs_list = []
        b_num = len(lambda_list)
        for i in range(0, b_num):
            dim = (int)(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0:
                bs_list.append(dim)

    for bs in bs_list:
        result_once = [0, 0, 0, 0]
        model_file = dst_folder + '/fgm_model%g' % bs
        predict_file = dst_folder + '/fgm_predict%g' % bs

        result_file = dst_folder + '/' + ori_result_file + '_%d' % bs
        #clear the file if it already exists
        open(result_file, 'w').close()

        #evaluate the result
        train_cmd = train_exe_name + ' -s 12 -c 10 -B %d' % bs + ' %s' % train_file + ' %s' % model_file
        test_cmd = test_exe_name + ' %s' % test_file + ' %s' % model_file + ' %s' % predict_file + '>> %s' % result_file

        print train_cmd
        start_time = time.time()
        os.system(train_cmd)
        end_time = time.time()

        #parse learning time
        train_time = (float)(end_time - start_time)
        result_once[3] = train_time

        #predict
        print test_cmd
        os.system(test_cmd)
        result_once[1] = liblinear_util.parse_error_rate(result_file)
        result_once[2] = bs

        result_all.append(result_once)
    return result_all
Beispiel #3
0
def run_liblinear(train_file, test_file,ds, result_file):

    result_once = [0,0,0,0]
    os.system('mkdir ./tmp')

    dst_folder = './%s' %ds
    tmp_folder = dst_folder + '/liblinear'
    #os.system('mkdir -p %s' %tmp_folder)
    run_util.create_dir(tmp_folder)

    model_file = tmp_folder + '/model'
    tmp_file   = tmp_folder + '/tmp.txt'

    train_exe_name = exe_path.liblinar_train_exe_name 
    test_exe_name = exe_path.liblinar_test_exe_name 

    #make the result dir
    #cmd = 'mkdir -p %s' %dst_folder
    #os.system(cmd)
    run_util.create_dir(dst_folder)


    result_file = './%s' %tmp_folder + '/' + result_file
    #clear the file if it already exists
    open(result_file,'w').close()

   
    #evaluate the result
    train_cmd = train_exe_name + ' %s' %train_file + ' %s' %model_file 
    test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %tmp_file + '>> %s' %result_file
    
    print train_cmd
    start_time =time.time()
    os.system(train_cmd)
    end_time = time.time()

    #parse learning time
    train_time = (float)(end_time - start_time) 
    result_once[3] = train_time
    
    #predict
    print test_cmd
    os.system(test_cmd)
    result_once[1] = liblinear_util.parse_error_rate(result_file)
    valid_dim = run_util.get_valid_dim(train_file)
    model_size = liblinear_util.get_model_size(model_file)
    result_once[2] = 100 - (model_size * 100.0 / valid_dim)

    return [result_once]
Beispiel #4
0
def run_liblinear(train_file, test_file, ds, result_file):

    result_once = [0, 0, 0, 0]
    os.system('mkdir ./tmp')

    dst_folder = './%s' % ds
    tmp_folder = dst_folder + '/liblinear'
    #os.system('mkdir -p %s' %tmp_folder)
    run_util.create_dir(tmp_folder)

    model_file = tmp_folder + '/model'
    tmp_file = tmp_folder + '/tmp.txt'

    train_exe_name = exe_path.liblinar_train_exe_name
    test_exe_name = exe_path.liblinar_test_exe_name

    #make the result dir
    #cmd = 'mkdir -p %s' %dst_folder
    #os.system(cmd)
    run_util.create_dir(dst_folder)

    result_file = './%s' % tmp_folder + '/' + result_file
    #clear the file if it already exists
    open(result_file, 'w').close()

    #evaluate the result
    train_cmd = train_exe_name + ' %s' % train_file + ' %s' % model_file
    test_cmd = test_exe_name + ' %s' % test_file + ' %s' % model_file + ' %s' % tmp_file + '>> %s' % result_file

    print train_cmd
    start_time = time.time()
    os.system(train_cmd)
    end_time = time.time()

    #parse learning time
    train_time = (float)(end_time - start_time)
    result_once[3] = train_time

    #predict
    print test_cmd
    os.system(test_cmd)
    result_once[1] = liblinear_util.parse_error_rate(result_file)
    valid_dim = run_util.get_valid_dim(train_file)
    model_size = liblinear_util.get_model_size(model_file)
    result_once[2] = 100 - (model_size * 100.0 / valid_dim)

    return [result_once]
Beispiel #5
0
def run_liblinear(train_file, test_file,ds, ori_result_file):
    result_all = []

    train_exe_name = exe_path.liblinar_train_exe_name 
    test_exe_name = exe_path.liblinar_test_exe_name 

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    c_list = l1_def.get_lambda_list(ds,'liblinear')

    for c in c_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/ll_model%g' %c
        predict_file   = dst_folder + '/ll_predict%g' %c


        result_file = dst_folder + '/' + ori_result_file + '_%f' %c
        #clear the file if it already exists
        open(result_file,'w').close()

        #evaluate the result
        train_cmd = train_exe_name + ' -s 5 -c %f' %c + ' %s' %train_file + ' %s' %model_file 
        test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file + '>> %s' %result_file
        #test_cmd = test_exe_name + ' %s' %test_file + ' %s' %model_file + ' %s' %predict_file 
        
        print train_cmd
        start_time =time.time()
        os.system(train_cmd)
        end_time = time.time()

        #parse learning time
        train_time = (float)(end_time - start_time) 
        result_once[3] = train_time
        
        #predict
        print test_cmd
        os.system(test_cmd)
        result_once[1] = liblinear_util.parse_error_rate(result_file)
        valid_dim = run_util.get_valid_dim(train_file)
        model_size = liblinear_util.get_model_size(model_file)
        result_once[2] = model_size

        result_all.append(result_once)
    return result_all
Beispiel #6
0
    for opt in opt_list:
        rows = len(result_all[opt])
        cols = len(result_all[opt][0])

        for k in range(0,rows):
            for m in range(0,cols):
                result_all[opt][k][m] /= rand_num

    return result_all 

for ds in ds_list:
    path_list = dataset.get_file_name(ds)
    dst_folder = ds
    dst_folder = ds
    #os.system("mkdir %s" %dst_folder)
    run_util.create_dir(dst_folder)

    result_all = train_model(path_list, dst_folder)

    for key,val in result_all.iteritems():
        #write the result to file
        parse_file = dst_folder +'/%s' %key + '.txt'
        run_util.write_parse_result(val,parse_file)

    opt_list_file = '%s' %dst_folder + os.sep + 'opt_list.txt' 

    try:
        file_handle = open(opt_list_file,'w')
        for opt in opt_list:
            file_handle.write(opt + '.txt\n')
    except IOError as e:
Beispiel #7
0
def run_vw(train_file, test_file, ds, result_file, is_cache=True):
    tmp_folder = ds + '/vw_tmp'
    #os.system('mkdir -p %s' %tmp_folder)
    run_util.create_dir(tmp_folder)

    model_file = tmp_folder + '/vw_model'
    rd_model_file = tmp_folder + '/vw_model.txt'
    tmp_file = tmp_folder + '/vw_tmp.txt'
    result_file = tmp_folder + '/' + result_file

    extra_cmd = ' --sgd --binary --loss_function=hinge '
    model_cmd = ' --readable_model %s' % rd_model_file + ' -f %s ' % model_file

    valid_dim = run_util.get_valid_dim(train_file)

    #transform into vw format
    if os.path.exists('%s.vw' % train_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' % train_file)
    if os.path.exists('%s.vw' % test_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' % test_file)

    if is_cache == True:
        cache_train = train_file + "_cache.vw"
        cache_test = test_file + "_cache.vw"

    train_file += ".vw"
    test_file += ".vw"

    #evaluate the result
    if is_cache == True:
        train_cmd_prefix = '%s' % exe_name + ' %s' % train_file + ' --cache_file %s ' % cache_train
        test_cmd_prefix = '%s' % exe_name + ' %s' % test_file + ' -t -i %s' % model_file + ' --cache_file %s ' % cache_test
    else:
        train_cmd_prefix = '%s' % exe_name + ' %s' % train_file
        test_cmd_prefix = '%s' % exe_name + ' %s' % test_file + ' -t -i %s' % model_file

    cmd_postfix = ' 2> %s' % tmp_file

    result_list = []

    lambda_list = l1_def.get_lambda_list(ds, 'vw')
    for l1 in lambda_list:
        result_item = [0, 0, 0, 0]
        #train
        cmd = train_cmd_prefix + ' --l1 %e' % l1 + extra_cmd + model_cmd + cmd_postfix
        print cmd
        start_time = time.time()
        os.system(cmd)
        end_time = time.time()
        #parse learning time
        result_item[3] = (float)(end_time - start_time)
        result_item[3] = (float)('%.2f' % result_item[3])

        #parse learn error rate
        result_item[0] = vw_util.parse_error_rate(tmp_file)
        #test
        cmd = test_cmd_prefix + extra_cmd + cmd_postfix
        print cmd
        os.system(cmd)

        result_item[1] = vw_util.parse_error_rate(tmp_file)

        #parse sparsity
        model_size = vw_util.get_model_size(rd_model_file)
        result_item[2] = 100 - (model_size * 100.0 / valid_dim)

        result_list.append(result_item)

    vw_util.write_parse_result(result_list, result_file)
    return result_list
Beispiel #8
0
def run_vw(train_file, test_file,ds,result_file, is_cache = True):
    tmp_folder = ds + '/vw_tmp'
    #os.system('mkdir -p %s' %tmp_folder)
    run_util.create_dir(tmp_folder)
    
    model_file = tmp_folder + '/vw_model'
    rd_model_file = tmp_folder + '/vw_model.txt'
    tmp_file = tmp_folder + '/vw_tmp.txt'
    result_file = tmp_folder + '/' + result_file
    
    extra_cmd = ' --sgd --binary --loss_function=hinge '
    model_cmd = ' --readable_model %s' %rd_model_file + ' -f %s ' %model_file
    
    valid_dim = run_util.get_valid_dim(train_file)
    
    #transform into vw format
    if os.path.exists('%s.vw' %train_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' %train_file)
    if os.path.exists('%s.vw' %test_file) == False:
        os.system('python ../tools/libsvm2vw.py %s' %test_file)
    
    if is_cache == True:
        cache_train = train_file + "_cache.vw"
        cache_test = test_file + "_cache.vw"
    
    train_file += ".vw"
    test_file += ".vw"
    
    #evaluate the result
    if is_cache == True:
        train_cmd_prefix = '%s' %exe_name + ' %s' %train_file +' --cache_file %s ' %cache_train
        test_cmd_prefix = '%s'  %exe_name + ' %s' %test_file + ' -t -i %s' %model_file + ' --cache_file %s ' %cache_test
    else:
        train_cmd_prefix = '%s' %exe_name + ' %s' %train_file 
        test_cmd_prefix = '%s'  %exe_name + ' %s' %test_file + ' -t -i %s' %model_file 
    
    cmd_postfix = ' 2> %s' %tmp_file
    
    result_list = []
    
    
    lambda_list = l1_def.get_lambda_list(ds,'vw')
    for l1 in lambda_list:
        result_item = [0,0,0,0]
        #train
        cmd = train_cmd_prefix + ' --l1 %e' %l1 + extra_cmd + model_cmd +  cmd_postfix
        print cmd
        start_time =time.time()
        os.system(cmd)
        end_time = time.time()
        #parse learning time
        result_item[3] = (float)(end_time - start_time) 
        result_item[3] = (float)('%.2f' %result_item[3]) 
    
        #parse learn error rate
        result_item[0] = vw_util.parse_error_rate(tmp_file)
        #test
        cmd = test_cmd_prefix + extra_cmd + cmd_postfix
        print cmd
        os.system(cmd)
    
        result_item[1] = vw_util.parse_error_rate(tmp_file)
    
        #parse sparsity
        model_size = vw_util.get_model_size(rd_model_file)
        result_item[2] = 100 - (model_size * 100.0 / valid_dim)
    
        result_list.append(result_item)
    
    vw_util.write_parse_result(result_list,result_file)
    return result_list
Beispiel #9
0
def run_mRMR(train_file, test_file,ds, result_file):
    result_all = []

    train_exe_name = exe_path.mRMR

    #make the result dir
    dst_folder = './%s' %ds
    run_util.create_dir(dst_folder)

    data_valid_dim = run_util.get_valid_dim(train_file)
    data_num = run_util.get_data_num(train_file)

    #bs_list = l1_def.get_lambda_list(ds,'mRMR')
    
    if 'synthetic' in ds:
        bs_list = l1_def.get_lambda_list(ds,'mRMR')
    else:
        lambda_list = l1_def.get_lambda_list(ds,'mRMR')

        bs_list = [] 
        b_num = len(lambda_list)
        for i in range(0,b_num):
            dim = int(data_valid_dim * (1 - lambda_list[i]))
            if dim > 0 and dim <= 500:
                bs_list.append(dim)

    bs_list = l1_def.get_lambda_list(ds,'mRMR')

    #clear the file if it already exists
    open(result_file,'w').close()

    for bs in bs_list:
        result_once = [0,0,0,0]
        model_file = dst_folder + '/mRMR_model%d' %bs
        parse_file = dst_folder + '/mRMR_model_parse%d' %bs 

        if os.path.exists(model_file) == False:
            print model_file + ' not exist'
            csv_train_file = train_file + '.csv'
            if os.path.exists(csv_train_file) == False:
                #convert data
                print 'convert data'
                cmd = exe_path.csv_converter + ' -i %s' %train_file + ' -o %s' %csv_train_file
                cmd += ' -sdt libsvm -ddt csv'
                print cmd
                os.system(cmd)

            #run mRMR
            prev_cmd = train_exe_name + ' -v %d' %data_valid_dim + ' -t 0.5 -i %s' %csv_train_file 
            cmd = prev_cmd + ' -n %d' %bs + ' > %s' %model_file
            print cmd
            start_time =time.time()
            os.system(cmd)
            end_time = time.time()

            #parse learning time
            train_time = (float)(end_time - start_time) 
            result_once[3] = train_time

            #parse result
            parse_model_file(model_file,parse_file);

        #run OGD
        cmd_data = dataset.get_cmd_data_by_file(train_file, test_file,True)
        cmd = exe_path.SOL_exe_name + cmd_data + ' -m %s' %parse_file + ' -k %d' %bs
        cmd += dataset.get_model_param(ds,'SGD-FS')
        cmd += ' -opt mRMR_OGD -norm -loss Hinge >> %s' %result_file

        print cmd
        os.system(cmd)

        result_once[2] = bs
        
        result_all.append(result_once)
    return result_all