Ejemplo n.º 1
0
import random

parts = np.arange(1500)
random.shuffle(parts)
with open("data/bigeg/shuffle.txt", "w") as fout:
	for p in parts:
		fout.write("%d\n"%p)

buffmax = int(1e10)
yx_data = np.empty(buffmax)
yx_row = np.empty(buffmax, dtype=int)
yx_col = np.empty(buffmax, dtype=int)
n = 0
l = 0
for i in range(500):
	yxi = readpart(parts[i])
	li = len(yxi.row)
	yx_data[l:(l+li)] = yxi.data
	yx_row[l:(l+li)] = yxi.row + n
	yx_col[l:(l+li)] = yxi.col
	n += yxi.shape[0]
	l += li
	print((i,n,l))

yx_data = yx_data[:l].astype(np.float32)
yx_row = yx_row[:l]
yx_col = yx_col[:l]
yx = sparse.csc_matrix((yx_data, (yx_row,yx_col)))
yx_data = None
yx_row = None
yx_col = None
Ejemplo n.º 2
0
import random

parts = np.arange(1500)
random.shuffle(parts)
with open("data/bigeg/shuffle.txt", "w") as fout:
    for p in parts:
        fout.write("%d\n" % p)

buffmax = int(1e10)
yx_data = np.empty(buffmax)
yx_row = np.empty(buffmax, dtype=int)
yx_col = np.empty(buffmax, dtype=int)
n = 0
l = 0
for i in range(500):
    yxi = readpart(parts[i])
    li = len(yxi.row)
    yx_data[l:(l + li)] = yxi.data
    yx_row[l:(l + li)] = yxi.row + n
    yx_col[l:(l + li)] = yxi.col
    n += yxi.shape[0]
    l += li
    print((i, n, l))

yx_data = yx_data[:l].astype(np.float32)
yx_row = yx_row[:l]
yx_col = yx_col[:l]
yx = sparse.csc_matrix((yx_data, (yx_row, yx_col)))
yx_data = None
yx_row = None
yx_col = None
Ejemplo n.º 3
0
sampforest.verbose = 0


def missclass(yx):
    n = yx.shape[0]
    y = yx[:, 0].toarray().squeeze().astype(int)
    yhat_ebf = np.empty(n, dtype=int)
    bvec = trunk.tree_.apply(yx[:, 1:].astype(tree._tree.DTYPE)).astype("str")
    for b in branches.keys():
        yhat_ebf[bvec == b] = branches[b].predict(yx[bvec == b, 1:])

    yhat_rsf = sampforest.predict(yx[:, 1:])
    mcrsf = np.sum(yhat_rsf != y)
    mcebf = np.sum(yhat_ebf != y)
    return ((n, mcebf, mcrsf))


parts = np.loadtxt("data/bigeg/shuffle.txt", dtype=int).tolist()
fout = open("results/bigeg/scores.txt", "w")
mc = (0, 0, 0)

for i in range(500, 1500):
    print(i, end=": ")
    yxi = sparse.csr_matrix(readpart(parts[i]))
    mci = missclass(yxi)
    mc = tuple(map(sum, zip(mc, mci)))
    fout.write("%d %d %d\n" % mci)
    print(mc)

fout.close()
Ejemplo n.º 4
0
def missclass(yx):
	n = yx.shape[0]
	y = yx[:,0].toarray().squeeze().astype(int)
	yhat_ebf = np.empty(n, dtype=int)
	bvec = trunk.tree_.apply(yx[:,1:].astype(tree._tree.DTYPE)).astype("str")
	for b in branches.keys():
		yhat_ebf[bvec==b] = branches[b].predict(yx[bvec==b,1:])

	yhat_rsf = sampforest.predict(yx[:,1:])
	mcrsf = np.sum(yhat_rsf != y)
	mcebf = np.sum(yhat_ebf != y)
	return( (n, mcebf, mcrsf) )


parts = np.loadtxt("data/bigeg/shuffle.txt", dtype=int).tolist()
fout = open("results/bigeg/scores.txt", "w")
mc = (0,0,0)

for i in range(500,1500):
	print(i, end=": ")
	yxi = sparse.csr_matrix(readpart(parts[i]))
	mci = missclass(yxi)
	mc = tuple(map(sum,zip(mc,mci)))
	fout.write("%d %d %d\n"%mci)
	print(mc)
	
fout.close()