ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] < 0.5)] = 0 ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] < 0.5)] = 1 ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] >= 0.5)] = 1 ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] >= 0.5)] = 0 ytr += 0.1 * np.random.randn(num) # data - test Xte = np.random.rand(num, dim) yte = np.zeros(num) yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] < 0.5)] = 0 yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] < 0.5)] = 1 yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] >= 0.5)] = 1 yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] >= 0.5)] = 0 yte += 0.1 * np.random.randn(num) # save dirname = './result_%s' % (prefix,) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',') np.savetxt(tefile, np.c_[Xte, yte], delimiter=',') # demo_R Kmax = 10 restart = 20 M = range(1, 16) paper_sub.run(prefix, Kmax, restart, plot=True) #paper_sub.run(prefix, Kmax, restart, plot=True, M=M, compare=True)
ytr = np.logical_xor(ytr, np.random.rand(num) > b) # data - test Xte = np.random.rand(num, dim) yte = np.zeros(num) yte = np.logical_xor(Xte[:, 0] > 0.5, Xte[:, 1] > 0.5) yte = np.logical_xor(yte, np.random.rand(num) > b) # save dirname = './result/result_%s' % (prefix,) if not os.path.exists('./result/'): os.mkdir('./result/') if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',') np.savetxt(tefile, np.c_[Xte, yte], delimiter=',') # demo_R Kmax = 10 restart = 20 treenum = 100 M = range(1, 11) #paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]]) paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]], M=M, compare=True) # plot plotTZ('%s/%s_true.pdf' % (dirname, prefix)) plotForest('%s/%s_rf_tree05_seed00.pdf' % (dirname, prefix))
df = pd.read_csv('./data/energy.csv', sep=',', header=None) df = df.drop([9, 10, 11], 1) # split num = len(df) m = int(np.ceil(ratio * num)) np.random.seed(seed) idx = np.random.permutation(num) df1 = df.ix[idx[:m], :] df2 = df.ix[idx[m:], :] # save if not os.path.exists('./result/'): os.mkdir('./result/') dirname = './result/result_%s' % (prefix,) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 200 treenum = 100 M = range(1, 11) featurename = ('Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area', 'Glazing Area Distribution') #paper_sub.run(prefix, Kmax, restart, modeltype='regression', plot=False, treenum=treenum, featurename=featurename) paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='regression', featurename=featurename, plot=False, M=M, compare=True)
# save dirname = './result/result_%s' % (prefix, ) if not os.path.exists('./result/'): os.mkdir('./result/') if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',') np.savetxt(tefile, np.c_[Xte, yte], delimiter=',') # demo_R Kmax = 10 restart = 20 treenum = 100 M = range(1, 11) #paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]]) paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=True, plot_line=[[t1, z1], [t2, z2]], M=M, compare=True) # plot plotTZ('%s/%s_true.pdf' % (dirname, prefix)) plotForest('%s/%s_rf_tree05_seed00.pdf' % (dirname, prefix))
df = df.drop(idx) df = pd.DataFrame(df.values) # split num = len(df) np.random.seed(seed) idx = np.random.permutation(num) df1 = df.ix[idx[:m1], :] df2 = df.ix[idx[m1:m2], :] # save if not os.path.exists('./result/'): os.mkdir('./result/') dirname = './result/result_%s' % (prefix,) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 20 treenum = 100 maxitr = 3000 tol = 1e-2 M = range(1, 11) #paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=treenum, maxitr=maxitr, tol=tol) paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', maxitr=maxitr, tol=tol, plot=False, M=M, compare=True)
ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] < 0.5)] = 0 ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] < 0.5)] = 1 ytr[(Xtr[:, 0] < 0.5) * (Xtr[:, 1] >= 0.5)] = 1 ytr[(Xtr[:, 0] >= 0.5) * (Xtr[:, 1] >= 0.5)] = 0 ytr += 0.1 * np.random.randn(num) # data - test Xte = np.random.rand(num, dim) yte = np.zeros(num) yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] < 0.5)] = 0 yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] < 0.5)] = 1 yte[(Xte[:, 0] < 0.5) * (Xte[:, 1] >= 0.5)] = 1 yte[(Xte[:, 0] >= 0.5) * (Xte[:, 1] >= 0.5)] = 0 yte += 0.1 * np.random.randn(num) # save dirname = './result_%s' % (prefix, ) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) np.savetxt(trfile, np.c_[Xtr, ytr], delimiter=',') np.savetxt(tefile, np.c_[Xte, yte], delimiter=',') # demo_R Kmax = 10 restart = 20 M = range(1, 16) paper_sub.run(prefix, Kmax, restart, plot=True) #paper_sub.run(prefix, Kmax, restart, plot=True, M=M, compare=True)
np.random.seed(seed) idx = np.random.permutation(num) df1 = df.ix[idx[:m1], :] df2 = df.ix[idx[m1:m2], :] # save if not os.path.exists('./result/'): os.mkdir('./result/') dirname = './result/result_%s' % (prefix, ) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 20 treenum = 100 M = range(1, 11) #paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=treenum) paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='classification', plot=False, M=M, compare=True)
# data df = pd.read_csv('./data/energy.csv', sep=',', header=None) df = df.drop([9, 10, 11], 1) # split num = len(df) m = int(np.ceil(ratio * num)) np.random.seed(seed) idx = np.random.permutation(num) df1 = df.ix[idx[:m], :] df2 = df.ix[idx[m:], :] # save dirname = './result_%s' % (prefix, ) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 20 M = range(1, 16) featurename = ('Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area', 'Glazing Area Distribution') paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False) #paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False, M=M, compare=True)
ratio = 0.5 # data df = pd.read_csv('./data/energy.csv', sep=',', header=None) df = df.drop([9, 10, 11], 1) # split num = len(df) m = int(np.ceil(ratio * num)) np.random.seed(seed) idx = np.random.permutation(num) df1 = df.ix[idx[:m], :] df2 = df.ix[idx[m:], :] # save dirname = './result_%s' % (prefix,) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 20 M = range(1, 16) featurename = ('Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area', 'Glazing Area Distribution') paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False) #paper_sub.run(prefix, Kmax, restart, featurename=featurename, plot=False, M=M, compare=True)
m2 = m1 + 10000 # data df = pd.read_csv('./data/HIGGS.csv', sep=',', header=None) cols = df.columns.tolist() cols = cols[1:] + cols[:1] df = df[cols] # split num = len(df) np.random.seed(seed) idx = np.random.permutation(num) df1 = df.ix[idx[:m1], :] df2 = df.ix[idx[m1:m2], :] # save dirname = './result_%s' % (prefix,) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 20 M = range(1, 16) paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=30, maxitr=3000, tol=1) #paper_sub.run(prefix, Kmax, restart, modeltype='classification', plot=False, treenum=30, maxitr=3000, tol=1, M=M, compare=True)
# save if not os.path.exists('./result/'): os.mkdir('./result/') dirname = './result/result_%s' % (prefix, ) if not os.path.exists(dirname): os.mkdir(dirname) trfile = '%s/%s_train.csv' % (dirname, prefix) tefile = '%s/%s_test.csv' % (dirname, prefix) df1.to_csv(trfile, header=None, index=False) df2.to_csv(tefile, header=None, index=False) # demo_R Kmax = 10 restart = 200 treenum = 100 M = range(1, 11) featurename = ('Relative Compactness', 'Surface Area', 'Wall Area', 'Roof Area', 'Overall Height', 'Orientation', 'Glazing Area', 'Glazing Area Distribution') #paper_sub.run(prefix, Kmax, restart, modeltype='regression', plot=False, treenum=treenum, featurename=featurename) paper_sub.run(prefix, Kmax, restart, treenum=treenum, modeltype='regression', featurename=featurename, plot=False, M=M, compare=True)