import random parts = np.arange(1500) random.shuffle(parts) with open("data/bigeg/shuffle.txt", "w") as fout: for p in parts: fout.write("%d\n"%p) buffmax = int(1e10) yx_data = np.empty(buffmax) yx_row = np.empty(buffmax, dtype=int) yx_col = np.empty(buffmax, dtype=int) n = 0 l = 0 for i in range(500): yxi = readpart(parts[i]) li = len(yxi.row) yx_data[l:(l+li)] = yxi.data yx_row[l:(l+li)] = yxi.row + n yx_col[l:(l+li)] = yxi.col n += yxi.shape[0] l += li print((i,n,l)) yx_data = yx_data[:l].astype(np.float32) yx_row = yx_row[:l] yx_col = yx_col[:l] yx = sparse.csc_matrix((yx_data, (yx_row,yx_col))) yx_data = None yx_row = None yx_col = None
import random parts = np.arange(1500) random.shuffle(parts) with open("data/bigeg/shuffle.txt", "w") as fout: for p in parts: fout.write("%d\n" % p) buffmax = int(1e10) yx_data = np.empty(buffmax) yx_row = np.empty(buffmax, dtype=int) yx_col = np.empty(buffmax, dtype=int) n = 0 l = 0 for i in range(500): yxi = readpart(parts[i]) li = len(yxi.row) yx_data[l:(l + li)] = yxi.data yx_row[l:(l + li)] = yxi.row + n yx_col[l:(l + li)] = yxi.col n += yxi.shape[0] l += li print((i, n, l)) yx_data = yx_data[:l].astype(np.float32) yx_row = yx_row[:l] yx_col = yx_col[:l] yx = sparse.csc_matrix((yx_data, (yx_row, yx_col))) yx_data = None yx_row = None yx_col = None
sampforest.verbose = 0 def missclass(yx): n = yx.shape[0] y = yx[:, 0].toarray().squeeze().astype(int) yhat_ebf = np.empty(n, dtype=int) bvec = trunk.tree_.apply(yx[:, 1:].astype(tree._tree.DTYPE)).astype("str") for b in branches.keys(): yhat_ebf[bvec == b] = branches[b].predict(yx[bvec == b, 1:]) yhat_rsf = sampforest.predict(yx[:, 1:]) mcrsf = np.sum(yhat_rsf != y) mcebf = np.sum(yhat_ebf != y) return ((n, mcebf, mcrsf)) parts = np.loadtxt("data/bigeg/shuffle.txt", dtype=int).tolist() fout = open("results/bigeg/scores.txt", "w") mc = (0, 0, 0) for i in range(500, 1500): print(i, end=": ") yxi = sparse.csr_matrix(readpart(parts[i])) mci = missclass(yxi) mc = tuple(map(sum, zip(mc, mci))) fout.write("%d %d %d\n" % mci) print(mc) fout.close()
def missclass(yx): n = yx.shape[0] y = yx[:,0].toarray().squeeze().astype(int) yhat_ebf = np.empty(n, dtype=int) bvec = trunk.tree_.apply(yx[:,1:].astype(tree._tree.DTYPE)).astype("str") for b in branches.keys(): yhat_ebf[bvec==b] = branches[b].predict(yx[bvec==b,1:]) yhat_rsf = sampforest.predict(yx[:,1:]) mcrsf = np.sum(yhat_rsf != y) mcebf = np.sum(yhat_ebf != y) return( (n, mcebf, mcrsf) ) parts = np.loadtxt("data/bigeg/shuffle.txt", dtype=int).tolist() fout = open("results/bigeg/scores.txt", "w") mc = (0,0,0) for i in range(500,1500): print(i, end=": ") yxi = sparse.csr_matrix(readpart(parts[i])) mci = missclass(yxi) mc = tuple(map(sum,zip(mc,mci))) fout.write("%d %d %d\n"%mci) print(mc) fout.close()