Beispiel #1
0
ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] < 0.5)] = 0
ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] < 0.5)] = 1
ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] >= 0.5)] = 1
ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] >= 0.5)] = 0
ytr += 0.1 * np.random.randn(num)

# data - test
Xte = np.random.rand(num, dim)
yte = np.zeros(num)
yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] < 0.5)] = 0
yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] < 0.5)] = 1
yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] >= 0.5)] = 1
yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] >= 0.5)] = 0
yte += 0.1 * np.random.randn(num)

# save
dirname = './result_%s' % (prefix,)
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',')
np.savetxt(tefile, np.c_[Xte, yte], delimiter=',')

# demo_R
Kmax = 10
restart = 20
M = range(1, 16)
paper_sub.run(prefix, Kmax, restart, plot=True)
#paper_sub.run(prefix, Kmax, restart, plot=True, M=M, compare=True)
 ytr = np.logical_xor(ytr, np.random.rand(num) > b)
 
 # data - test
 Xte = np.random.rand(num, dim)
 yte = np.zeros(num)
 yte = np.logical_xor(Xte[:, 0] > 0.5, Xte[:, 1] > 0.5)
 yte = np.logical_xor(yte, np.random.rand(num) > b)
 
 # save
 dirname = './result/result_%s' % (prefix,)
 if not os.path.exists('./result/'):
     os.mkdir('./result/')
 if not os.path.exists(dirname):
     os.mkdir(dirname)
 trfile = '%s/%s_train.csv' % (dirname, prefix)
 tefile = '%s/%s_test.csv' % (dirname, prefix)
 np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',')
 np.savetxt(tefile, np.c_[Xte, yte], delimiter=',')
 
 # demo_R
 Kmax = 10
 restart = 20
 treenum = 100
 M = range(1, 11)
 #paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]])
 paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]], M=M, compare=True)
 
 # plot
 plotTZ('%s/%s_true.pdf' % (dirname, prefix))
 plotForest('%s/%s_rf_tree05_seed00.pdf' % (dirname, prefix))
 
df = pd.read_csv('./data/energy.csv', sep=',', header=None)
df = df.drop([9, 10, 11], 1)

# split
num = len(df)
m = int(np.ceil(ratio * num))
np.random.seed(seed)
idx = np.random.permutation(num)
df1 = df.ix[idx[:m], :]
df2 = df.ix[idx[m:], :]

# save
if not os.path.exists('./result/'):
    os.mkdir('./result/')
dirname = './result/result_%s' % (prefix,)
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 200
treenum = 100
M = range(1, 11)
featurename = ('Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area', 'Glazing Area Distribution')
#paper_sub.run(prefix, Kmax, restart, modeltype='regression', plot=False, treenum=treenum, featurename=featurename)
paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='regression', featurename=featurename, plot=False, M=M, compare=True)
Beispiel #4
0
    # save
    dirname = './result/result_%s' % (prefix, )
    if not os.path.exists('./result/'):
        os.mkdir('./result/')
    if not os.path.exists(dirname):
        os.mkdir(dirname)
    trfile = '%s/%s_train.csv' % (dirname, prefix)
    tefile = '%s/%s_test.csv' % (dirname, prefix)
    np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',')
    np.savetxt(tefile, np.c_[Xte, yte], delimiter=',')

    # demo_R
    Kmax = 10
    restart = 20
    treenum = 100
    M = range(1, 11)
    #paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]])
    paper_sub.run(prefix,
                  Kmax,
                  restart,
                  treenum=treenum,
                  modeltype='classification',
                  plot=True,
                  plot_line=[[t1, z1], [t2, z2]],
                  M=M,
                  compare=True)

    # plot
    plotTZ('%s/%s_true.pdf' % (dirname, prefix))
    plotForest('%s/%s_rf_tree05_seed00.pdf' % (dirname, prefix))
df = df.drop(idx)
df = pd.DataFrame(df.values)

# split
num = len(df)
np.random.seed(seed)
idx = np.random.permutation(num)
df1 = df.ix[idx[:m1], :]
df2 = df.ix[idx[m1:m2], :]

# save
if not os.path.exists('./result/'):
    os.mkdir('./result/')
dirname = './result/result_%s' % (prefix,)
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 20
treenum = 100
maxitr = 3000
tol = 1e-2
M = range(1, 11)
#paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=treenum, maxitr=maxitr, tol=tol)
paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', maxitr=maxitr, tol=tol, plot=False, M=M, compare=True)
Beispiel #6
0
ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] < 0.5)] = 0
ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] < 0.5)] = 1
ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] >= 0.5)] = 1
ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] >= 0.5)] = 0
ytr += 0.1 * np.random.randn(num)

# data - test
Xte = np.random.rand(num, dim)
yte = np.zeros(num)
yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] < 0.5)] = 0
yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] < 0.5)] = 1
yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] >= 0.5)] = 1
yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] >= 0.5)] = 0
yte += 0.1 * np.random.randn(num)

# save
dirname = './result_%s' % (prefix, )
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',')
np.savetxt(tefile, np.c_[Xte, yte], delimiter=',')

# demo_R
Kmax = 10
restart = 20
M = range(1, 16)
paper_sub.run(prefix, Kmax, restart, plot=True)
#paper_sub.run(prefix, Kmax, restart, plot=True, M=M, compare=True)
Beispiel #7
0
np.random.seed(seed)
idx = np.random.permutation(num)
df1 = df.ix[idx[:m1], :]
df2 = df.ix[idx[m1:m2], :]

# save
if not os.path.exists('./result/'):
    os.mkdir('./result/')
dirname = './result/result_%s' % (prefix, )
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 20
treenum = 100
M = range(1, 11)
#paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=treenum)
paper_sub.run(prefix,
              Kmax,
              restart,
              treenum=treenum,
              modeltype='classification',
              plot=False,
              M=M,
              compare=True)
Beispiel #8
0
# data
df = pd.read_csv('./data/energy.csv', sep=',', header=None)
df = df.drop([9, 10, 11], 1)

# split
num = len(df)
m = int(np.ceil(ratio * num))
np.random.seed(seed)
idx = np.random.permutation(num)
df1 = df.ix[idx[:m], :]
df2 = df.ix[idx[m:], :]

# save
dirname = './result_%s' % (prefix, )
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 20
M = range(1, 16)
featurename = ('Relative Compactness', 'Surface Area', 'Wall Area',
               'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area',
               'Glazing Area Distribution')
paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False)
#paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False, M=M, compare=True)
Beispiel #9
0
ratio = 0.5

# data
df = pd.read_csv('./data/energy.csv', sep=',', header=None)
df = df.drop([9, 10, 11], 1)

# split
num = len(df)
m = int(np.ceil(ratio * num))
np.random.seed(seed)
idx = np.random.permutation(num)
df1 = df.ix[idx[:m], :]
df2 = df.ix[idx[m:], :]

# save
dirname = './result_%s' % (prefix,)
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 20
M = range(1, 16)
featurename = ('Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area', 'Glazing Area Distribution')
paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False)
#paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False, M=M, compare=True)
Beispiel #10
0
m2 = m1 + 10000

# data
df = pd.read_csv('./data/HIGGS.csv', sep=',', header=None)
cols = df.columns.tolist()
cols = cols[1:] + cols[:1]
df = df[cols]

# split
num = len(df)
np.random.seed(seed)
idx = np.random.permutation(num)
df1 = df.ix[idx[:m1], :]
df2 = df.ix[idx[m1:m2], :]

# save
dirname = './result_%s' % (prefix,)
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 20
M = range(1, 16)
paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=30, maxitr=3000, tol=1)
#paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=30, maxitr=3000, tol=1, M=M, compare=True)
Beispiel #11
0
# save
if not os.path.exists('./result/'):
    os.mkdir('./result/')
dirname = './result/result_%s' % (prefix, )
if not os.path.exists(dirname):
    os.mkdir(dirname)
trfile = '%s/%s_train.csv' % (dirname, prefix)
tefile = '%s/%s_test.csv' % (dirname, prefix)
df1.to_csv(trfile, header=None, index=False)
df2.to_csv(tefile, header=None, index=False)

# demo_R
Kmax = 10
restart = 200
treenum = 100
M = range(1, 11)
featurename = ('Relative Compactness', 'Surface Area', 'Wall Area',
               'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area',
               'Glazing Area Distribution')
#paper_sub.run(prefix, Kmax, restart, modeltype='regression', plot=False, treenum=treenum, featurename=featurename)
paper_sub.run(prefix,
              Kmax,
              restart,
              treenum=treenum,
              modeltype='regression',
              featurename=featurename,
              plot=False,
              M=M,
              compare=True)