def helper_display_dataset(bunch): """Helper for display_dataset visualization :param bunch: :return: a dict """ data = bunch["data"] target = bunch["target"] csv = [] nrows, ncols = data.shape for i in range(0, nrows): csv.append([]) # Features: for j in range(0, ncols): if dataset.is_feature_nominal(bunch, j): #nominal feat for i in range(0, nrows): if np.isnan(data[i][j]): csv[i].append(np.nan) else: val = int(data[i][j]) csv[i].append(bunch.feature_value_names[j][val] + (" [%d]" % val)) else: for i in range(0, nrows): csv[i].append(data[i][j]) # Target: if dataset.is_target_nominal(bunch): #nominal target for i in range(0, nrows): if np.isnan(target[i]): csv[i].append(np.nan) else: val = int(target[i]) csv[i].append(bunch.target_names[val] + (" [%d]" % val)) else: for i in range(0, nrows): csv[i].append(target[i]) if bunch.has_key("feature_names"): attrs = bunch.feature_names else: attrs = ["attribute" + str(i) for i in range(len(data[0]))] #name of attributes # attrs.append('class') metas = '' data_new = csv #fill table with data return { 'attrs': attrs, 'metas': metas, 'data_new': data_new, 'class_var': 'class' }
def helper_display_dataset(bunch): """Helper for display_dataset visualization :param bunch: :return: a dict """ data = bunch["data"] target = bunch["target"] csv=[] nrows, ncols = data.shape for i in range(0,nrows): csv.append([]) # Features: for j in range(0,ncols): if dataset.is_feature_nominal(bunch, j): #nominal feat for i in range(0,nrows): if np.isnan(data[i][j]): csv[i].append( np.nan ) else: val = int(data[i][j]) csv[i].append( bunch.feature_value_names[j][val]+(" [%d]" % val) ) else: for i in range(0,nrows): csv[i].append( data[i][j] ) # Target: if dataset.is_target_nominal(bunch): #nominal target for i in range(0,nrows): if np.isnan(target[i]): csv[i].append( np.nan ) else: val = int(target[i]) csv[i].append( bunch.target_names[val]+(" [%d]" % val) ) else: for i in range(0,nrows): csv[i].append( target[i] ) if bunch.has_key("feature_names"): attrs = bunch.feature_names else: attrs = ["attribute"+str(i) for i in range(len(data[0]))] #name of attributes # attrs.append('class') metas = '' data_new = csv #fill table with data return {'attrs':attrs, 'metas':metas, 'data_new':data_new, 'class_var':'class'}
def split_dataset_randomly(input_dict): """ Randomly splits a given dataset into a train and test dataset.""" inst = input_dict['data'] test_size = 1 - float(input_dict["p"]) # train test split from sklearn.cross_validation import train_test_split data_train, data_test, target_train, target_test = train_test_split( inst['data'], inst['target'], test_size=test_size, random_state=1) from sklearn.datasets import base as ds if dataset.is_target_nominal(inst): a_train = ds.Bunch(data=data_train, target=target_train, feature_names=inst.feature_names, DESCR=inst.DESCR, target_names=inst.target_names) a_test = ds.Bunch(data=data_test, target=target_test, feature_names=inst.feature_names, DESCR=inst.DESCR, target_names=inst.target_names) else: a_train = ds.Bunch(data=data_train, target=target_train, feature_names=inst.feature_names, DESCR=inst.DESCR) a_test = ds.Bunch(data=data_test, target=target_test, feature_names=inst.feature_names, DESCR=inst.DESCR) if inst.has_key("feature_value_names"): a_train["feature_value_names"] = inst.feature_value_names a_test["feature_value_names"] = inst.feature_value_names return {'train_data': a_train, 'test_data': a_test}
def select_data(request, input_dict, output_dict, widget): bunch = input_dict["data"] attrs = {} if bunch.has_key("feature_names"): for i, f in enumerate(bunch.feature_names): if bunch.has_key("feature_value_names") and len(bunch.feature_value_names[i]) > 0: vals = [str(v) for v in bunch.feature_value_names[i]] attrs[f] = {"values": vals, "type": "Discrete", "feature": 1} else: attrs[f] = {"values": [], "type": "Continuous", "feature": 1} # Target: if dataset.is_target_nominal(bunch): # nominal target attrs["class"] = {"values": bunch.target_names, "type": "Discrete", "feature": 0} else: attrs["class"] = {"values": [], "type": "Continuous", "feature": 0} attrs_as_list = attrs.items() # do not sort the features return render(request, "interactions/select_data.html", {"widget": widget, "attrs": attrs_as_list})
def select_data(request, input_dict, output_dict, widget): bunch = input_dict['data'] attrs = {} if bunch.has_key('feature_names'): for i,f in enumerate(bunch.feature_names): if bunch.has_key('feature_value_names') and len(bunch.feature_value_names[i])>0: vals = [str(v) for v in bunch.feature_value_names[i]] attrs[f] = {'values': vals, 'type': 'Discrete', 'feature': 1} else: attrs[f] = {'values': [], 'type': 'Continuous', 'feature': 1} # Target: if dataset.is_target_nominal(bunch): #nominal target attrs['class'] = {'values': bunch.target_names, 'type': 'Discrete', 'feature': 0} else: attrs['class'] = {'values': [], 'type': 'Continuous', 'feature': 0} attrs_as_list = attrs.items() # do not sort the features return render(request, 'interactions/select_data.html', {'widget' : widget, 'attrs' : attrs_as_list})