def model_view(request,input_dict,output_dict,widget): from discomll.utils import model_view import os.path from mothra.settings import MEDIA_ROOT from workflows.helpers import ensure_dir folder = 'discomll_models' tag_name = input_dict["fitmodel_url"] tag = input_dict["fitmodel_url"].values()[0] destination = MEDIA_ROOT+'/'+folder+"/"+tag[0][6:]+'.txt' ensure_dir(destination) if not os.path.isfile(destination): #file doesnt exists model = model_view.output_model(tag_name) f = open(destination,'w') f.write(model) f.close() filename = folder+"/"+tag[0][6:]+'.txt' output_dict['filename'] = filename return render(request, 'visualizations/string_to_file.html',{'widget':widget,'input_dict':input_dict,'output_dict':output_dict})
def model_view(request, input_dict, output_dict, widget): from discomll.utils import model_view import os.path from mothra.settings import MEDIA_ROOT from workflows.helpers import ensure_dir folder = 'discomll_models' tag_name = input_dict["fitmodel_url"] tag = input_dict["fitmodel_url"].values()[0] destination = MEDIA_ROOT + '/' + folder + "/" + tag[0][6:] + '.txt' ensure_dir(destination) if not os.path.isfile(destination): #file doesnt exists model = model_view.output_model(tag_name) f = open(destination, 'w') f.write(model) f.close() filename = folder + "/" + tag[0][6:] + '.txt' output_dict['filename'] = filename return render(request, 'visualizations/string_to_file.html', { 'widget': widget, 'input_dict': input_dict, 'output_dict': output_dict })
from discomll.utils import model_view # define training dataset train = dataset.Data(data_tag=["test:breast_cancer_cont"], data_type="chunk", # define data source - chunk data on ddfs X_indices=xrange(0, 9), # define attribute indices y_index=9, # define class index delimiter=",") # define test dataset test = dataset.Data(data_tag=["test:breast_cancer_cont_test"], data_type="chunk", # define data source - chunk data on ddfs X_indices=xrange(0, 9), # define attribute indices y_index=9, # define class index delimiter=",") # fit model on training dataset fit_model = kmeans.fit(train, n_clusters=2, max_iterations=5, random_state=0) # output model model = model_view.output_model(fit_model) print model # predict test dataset predictions = kmeans.predict(test, fit_model) # output results for k, v in result_iterator(predictions): print k, v
from discomll import dataset from discomll.classification import logistic_regression from discomll.utils import model_view # define training dataset train = dataset.Data(data_tag=["test:ex4"], data_type="chunk", X_indices=xrange(0, 2), y_index=2, y_map=["0.0000000e+00", "1.0000000e+00"]) # fit model on training dataset fit_model = logistic_regression.fit(train) # output model model = model_view.output_model(fit_model) print model
from disco.core import result_iterator from discomll import dataset from discomll.ensemble import distributed_random_forest from discomll.utils import model_view from discomll.utils import accuracy train = dataset.Data(data_tag=[ ["http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"]], id_index=0, X_indices=xrange(1, 10), X_meta="http://ropot.ijs.si/data/datasets_meta/breastcancer_meta.csv", y_index=10, delimiter=",") fit_model = distributed_random_forest.fit(train, trees_per_chunk=3, max_tree_nodes=50, min_samples_leaf=10, min_samples_split=5, class_majority=1, measure="info_gain", accuracy=1, separate_max=True, random_state=None, save_results=True) print model_view.output_model(fit_model) # predict training dataset predictions = distributed_random_forest.predict(train, fit_model) # output results for k, v in result_iterator(predictions): print k, v # measure accuracy ca = accuracy.measure(train, predictions) print ca
from disco.core import result_iterator from discomll import dataset from discomll.ensemble import forest_distributed_decision_trees from discomll.utils import model_view train = dataset.Data(data_tag=[["http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"]], X_indices=xrange(0, 4), X_meta="http://ropot.ijs.si/data/datasets_meta/iris_meta.csv", y_index=4, delimiter=",") fit_model = forest_distributed_decision_trees.fit(train, trees_per_chunk=1, bootstrap=False, max_tree_nodes=50, min_samples_leaf=2, min_samples_split=1, class_majority=1, separate_max=True, measure="info_gain", accuracy=1, random_state=None, save_results=True) print model_view.output_model(fit_model) # predict training dataset predictions = forest_distributed_decision_trees.predict(train, fit_model) # output results for k, v in result_iterator(predictions): print k, v[0]