예제 #1
0
def dt_predict(input_dict):
    from discomll.ensemble import forest_distributed_decision_trees

    predictions_url = forest_distributed_decision_trees.predict(input_dict["dataset"],
                                                                fitmodel_url=input_dict["fitmodel_url"],
                                                                save_results=True)
    return {"string": predictions_url}
예제 #2
0
def dt_predict(input_dict):
    from discomll.ensemble import forest_distributed_decision_trees

    predictions_url = forest_distributed_decision_trees.predict(
        input_dict["dataset"],
        fitmodel_url=input_dict["fitmodel_url"],
        save_results=True)
    return {"string": predictions_url}
예제 #3
0
from discomll import dataset
from discomll.ensemble import forest_distributed_decision_trees

train = dataset.Data(data_tag=["http://ropot.ijs.si/data/segmentation/train/xaaaaa.gz",
                               "http://ropot.ijs.si/data/segmentation/train/xaaabj.gz"],
                     data_type="gzip",
                     generate_urls=True,
                     X_indices=range(2, 21),
                     id_index=0,
                     y_index=1,
                     X_meta=["c" for i in range(2, 21)],
                     delimiter=",")

test = dataset.Data(data_tag=["http://ropot.ijs.si/data/segmentation/test/xaaaaa.gz",
                              "http://ropot.ijs.si/data/segmentation/test/xaaabj.gz"],
                    data_type="gzip",
                    generate_urls=True,
                    X_indices=range(2, 21),
                    id_index=0,
                    y_index=1,
                    X_meta=["c" for i in range(2, 21)],
                    delimiter=",")

fit_model = forest_distributed_decision_trees.fit(train, trees_per_chunk=1, bootstrap=True, max_tree_nodes=50,
                                                  min_samples_leaf=10, min_samples_split=5, class_majority=1,
                                                  separate_max=True, measure="info_gain", accuracy=1, random_state=None,
                                                  save_results=True)
predict_url = forest_distributed_decision_trees.predict(test, fit_model)
print predict_url
예제 #4
0
                     y_index=1,
                     X_meta=["c" for i in range(2, 21)],
                     delimiter=",")

test = dataset.Data(data_tag=[
    "http://ropot.ijs.si/data/segmentation/test/xaaaaa.gz",
    "http://ropot.ijs.si/data/segmentation/test/xaaabj.gz"
],
                    data_type="gzip",
                    generate_urls=True,
                    X_indices=range(2, 21),
                    id_index=0,
                    y_index=1,
                    X_meta=["c" for i in range(2, 21)],
                    delimiter=",")

fit_model = forest_distributed_decision_trees.fit(train,
                                                  trees_per_chunk=1,
                                                  bootstrap=True,
                                                  max_tree_nodes=50,
                                                  min_samples_leaf=10,
                                                  min_samples_split=5,
                                                  class_majority=1,
                                                  separate_max=True,
                                                  measure="info_gain",
                                                  accuracy=1,
                                                  random_state=None,
                                                  save_results=True)
predict_url = forest_distributed_decision_trees.predict(test, fit_model)
print predict_url
예제 #5
0
from disco.core import result_iterator

from discomll import dataset
from discomll.ensemble import forest_distributed_decision_trees
from discomll.utils import model_view

train = dataset.Data(data_tag=[["http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"]],
                     X_indices=xrange(0, 4),
                     X_meta="http://ropot.ijs.si/data/datasets_meta/iris_meta.csv",
                     y_index=4,
                     delimiter=",")

fit_model = forest_distributed_decision_trees.fit(train, trees_per_chunk=1, bootstrap=False, max_tree_nodes=50,
                                                  min_samples_leaf=2, min_samples_split=1, class_majority=1,
                                                  separate_max=True, measure="info_gain", accuracy=1, random_state=None,
                                                  save_results=True)

print model_view.output_model(fit_model)

# predict training dataset
predictions = forest_distributed_decision_trees.predict(train, fit_model)

# output results
for k, v in result_iterator(predictions):
    print k, v[0]