Esempio n. 1
0
def predict(dir, X_pred):
    if not os.path.exists(dir + '.hdf5'):
        raise IOError(
            'Directory {:s} does not contain proper random forest!\n'.format(
                dir))

    predictions = np.zeros(len(X_pred))
    X_pred = np.asarray(X_pred)

    trunks = []
    compares = []
    leafs = []

    with h5py.File(dir + '.hdf5', "r") as h5f:
        trees = h5f.keys()
        random.shuffle(trees)
        trees = trees[:int(len(trees) * treefraction)]

        for t in trees:
            trunks.append(h5f[t + '/trunks'][()])
            compares.append(h5f[t + '/compares'][()])
            leafs.append(h5f[t + '/leafs'][()])

    shape = (len(trunks), max(t.shape[0] for t in trunks),
             max(t.shape[1] for t in trunks))
    trunks_ = np.full(shape, np.nan, dtype=np.int64)

    for i, t in enumerate(trunks):
        trunks_[i, :t.shape[0], :t.shape[1]] = t

    leafs_ = np.full((len(leafs), max(l.shape[0] for l in leafs)),
                     np.nan,
                     dtype=np.float64)

    for i, t in enumerate(leafs):
        leafs_[i, :t.shape[0]] = t

    compares_ = np.full((len(compares), max(c.shape[0] for c in compares)),
                        np.nan,
                        dtype=np.float64)
    for i, t in enumerate(compares):
        compares_[i, :t.shape[0]] = t
    del leafs, trunks, compares

    _predict_parallel.predict(trunks_,
                              leafs_,
                              compares_,
                              X_pred,
                              predictions,
                              num_threads=num_threads)
    print predictions
    return predictions
Esempio n. 2
0
def predict(dir, X_pred):
    if not os.path.exists(dir + '/tree.list'):
        raise IOError('Directory {:s} does not contain proper random forest!\n'.format(dir))

    trees = open(dir + '/tree.list').read().strip().split('\n')
    random.shuffle(trees)
    trees = trees[:int(len(trees)*treefraction)]
    predictions = np.zeros(len(X_pred))
    X_pred = np.asarray(X_pred)

    trunks = []
    compares = []
    leafs = []

    for t in trees:
        t = joblib.load(dir + '/' + t.split('/')[-1])[:5]
        trunks.append(np.vstack((t[0], t[2:4])))
        compares.append(t[1])
        leaf = t[4][:, 0, :]
        leafs.append(leaf[:, 1] / leaf.sum(axis=1))

    shape = (len(trunks), max(t.shape[0] for t in trunks), max(t.shape[1] for t in trunks))
    trunks_ = np.full(shape, np.nan, dtype=np.int64)

    for i, t in enumerate(trunks):
        trunks_[i, :t.shape[0], :t.shape[1]] = t

    leafs_ = np.full((len(leafs), max(l.shape[0] for l in leafs)), np.nan, dtype=np.float64)

    for i, t in enumerate(leafs):
        leafs_[i, :t.shape[0]] = t

    compares_ = np.full((len(compares), max(c.shape[0] for c in compares)), np.nan, dtype=np.float64)
    for i, t in enumerate(compares):
        compares_[i, :t.shape[0]] = t
    del leafs, trunks, compares

    _predict_parallel.predict(trunks_, leafs_, compares_, X_pred, predictions, num_threads=num_threads)
    print predictions
    return predictions
Esempio n. 3
0
def predict(dir, X_pred):
    if not os.path.exists(dir + "/tree.list"):
        raise IOError("Directory {:s} does not contain proper random forest!\n".format(dir))

    trees = open(dir + "/tree.list").read().strip().split("\n")
    random.shuffle(trees)
    trees = trees[: int(len(trees) * treefraction)]
    predictions = np.zeros(len(X_pred))
    X_pred = np.asarray(X_pred)

    trunks = []
    compares = []
    leafs = []

    for t in trees:
        t = joblib.load(dir + "/" + t.split("/")[-1])[:5]
        trunks.append(np.vstack((t[0], t[2:4])))
        compares.append(t[1])
        leaf = t[4][:, 0, :]
        leafs.append(leaf[:, 1] / leaf.sum(axis=1))

    shape = (len(trunks), max(t.shape[0] for t in trunks), max(t.shape[1] for t in trunks))
    trunks_ = np.full(shape, np.nan, dtype=np.int64)

    for i, t in enumerate(trunks):
        trunks_[i, : t.shape[0], : t.shape[1]] = t

    leafs_ = np.full((len(leafs), max(l.shape[0] for l in leafs)), np.nan, dtype=np.float64)

    for i, t in enumerate(leafs):
        leafs_[i, : t.shape[0]] = t

    compares_ = np.full((len(compares), max(c.shape[0] for c in compares)), np.nan, dtype=np.float64)
    for i, t in enumerate(compares):
        compares_[i, : t.shape[0]] = t
    del leafs, trunks, compares

    _predict_parallel.predict(trunks_, leafs_, compares_, X_pred, predictions, num_threads=num_threads)
    print predictions
    return predictions
Esempio n. 4
0
def predict(dir, X_pred):
    if not os.path.exists(dir + '.hdf5'):
        raise IOError('Directory {:s} does not contain proper random forest!\n'.format(dir))

    predictions = np.zeros(len(X_pred))
    X_pred = np.asarray(X_pred)

    trunks = []
    compares = []
    leafs = []

    with h5py.File(dir + '.hdf5', "r") as h5f:
        trees = h5f.keys()
        random.shuffle(trees)
        trees = trees[:int(len(trees)*treefraction)]

        for t in trees:
            trunks.append(h5f[t + '/trunks'][()])
            compares.append(h5f[t + '/compares'][()])
            leafs.append(h5f[t + '/leafs'][()])

    shape = (len(trunks), max(t.shape[0] for t in trunks), max(t.shape[1] for t in trunks))
    trunks_ = np.full(shape, np.nan, dtype=np.int64)

    for i, t in enumerate(trunks):
        trunks_[i, :t.shape[0], :t.shape[1]] = t

    leafs_ = np.full((len(leafs), max(l.shape[0] for l in leafs)), np.nan, dtype=np.float64)

    for i, t in enumerate(leafs):
        leafs_[i, :t.shape[0]] = t

    compares_ = np.full((len(compares), max(c.shape[0] for c in compares)), np.nan, dtype=np.float64)
    for i, t in enumerate(compares):
        compares_[i, :t.shape[0]] = t
    del leafs, trunks, compares

    _predict_parallel.predict(trunks_, leafs_, compares_, X_pred, predictions, num_threads=num_threads)
    print predictions
    return predictions