Ejemplo n.º 1
0
def helper_display_dataset(bunch):
    """Helper for display_dataset visualization

    :param bunch:
    :return: a dict
    """

    data = bunch["data"]
    target = bunch["target"]

    csv = []

    nrows, ncols = data.shape
    for i in range(0, nrows):
        csv.append([])

    # Features:
    for j in range(0, ncols):
        if dataset.is_feature_nominal(bunch, j):
            #nominal feat
            for i in range(0, nrows):
                if np.isnan(data[i][j]):
                    csv[i].append(np.nan)
                else:
                    val = int(data[i][j])
                    csv[i].append(bunch.feature_value_names[j][val] +
                                  (" [%d]" % val))
        else:
            for i in range(0, nrows):
                csv[i].append(data[i][j])

    # Target:
    if dataset.is_target_nominal(bunch):
        #nominal target
        for i in range(0, nrows):
            if np.isnan(target[i]):
                csv[i].append(np.nan)
            else:
                val = int(target[i])
                csv[i].append(bunch.target_names[val] + (" [%d]" % val))
    else:
        for i in range(0, nrows):
            csv[i].append(target[i])

    if bunch.has_key("feature_names"):
        attrs = bunch.feature_names
    else:
        attrs = ["attribute" + str(i)
                 for i in range(len(data[0]))]  #name of attributes

    # attrs.append('class')
    metas = ''
    data_new = csv  #fill table with data

    return {
        'attrs': attrs,
        'metas': metas,
        'data_new': data_new,
        'class_var': 'class'
    }
Ejemplo n.º 2
0
def helper_display_dataset(bunch):
    """Helper for display_dataset visualization

    :param bunch:
    :return: a dict
    """

    data = bunch["data"]
    target = bunch["target"]

    csv=[]

    nrows, ncols = data.shape
    for i in range(0,nrows):
        csv.append([])

    # Features:
    for j in range(0,ncols):
        if dataset.is_feature_nominal(bunch, j):
            #nominal feat
            for i in range(0,nrows):
                if np.isnan(data[i][j]):
                    csv[i].append( np.nan )
                else:
                    val = int(data[i][j])
                    csv[i].append( bunch.feature_value_names[j][val]+(" [%d]" % val) )
        else:
            for i in range(0,nrows):
                csv[i].append( data[i][j] )

    # Target:
    if dataset.is_target_nominal(bunch):
        #nominal target
        for i in range(0,nrows):
            if np.isnan(target[i]):
                csv[i].append( np.nan )
            else:
                val = int(target[i])
                csv[i].append( bunch.target_names[val]+(" [%d]" % val) )
    else:
        for i in range(0,nrows):
            csv[i].append( target[i] )

    if bunch.has_key("feature_names"):
        attrs = bunch.feature_names
    else:
        attrs = ["attribute"+str(i) for i in range(len(data[0]))] #name of attributes

    # attrs.append('class')
    metas = ''
    data_new = csv #fill table with data

    return {'attrs':attrs, 'metas':metas, 'data_new':data_new, 'class_var':'class'}
Ejemplo n.º 3
0
def split_dataset_randomly(input_dict):
    """ Randomly splits a given dataset into a train and test dataset."""

    inst = input_dict['data']
    test_size = 1 - float(input_dict["p"])

    # train test split
    from sklearn.cross_validation import train_test_split
    data_train, data_test, target_train, target_test = train_test_split(
            inst['data'],
            inst['target'],
            test_size=test_size,
            random_state=1)

    from sklearn.datasets import base as ds

    if dataset.is_target_nominal(inst):
        a_train = ds.Bunch(data=data_train,
                           target=target_train,
                           feature_names=inst.feature_names,
                           DESCR=inst.DESCR,
                           target_names=inst.target_names)

        a_test = ds.Bunch(data=data_test,
                          target=target_test,
                          feature_names=inst.feature_names,
                          DESCR=inst.DESCR,
                          target_names=inst.target_names)
    else:
        a_train = ds.Bunch(data=data_train,
                           target=target_train,
                           feature_names=inst.feature_names,
                           DESCR=inst.DESCR)

        a_test = ds.Bunch(data=data_test,
                          target=target_test,
                          feature_names=inst.feature_names,
                          DESCR=inst.DESCR)

    if inst.has_key("feature_value_names"):
        a_train["feature_value_names"] = inst.feature_value_names
        a_test["feature_value_names"] = inst.feature_value_names

    return {'train_data': a_train, 'test_data': a_test}
Ejemplo n.º 4
0
def select_data(request, input_dict, output_dict, widget):
    bunch = input_dict["data"]

    attrs = {}
    if bunch.has_key("feature_names"):
        for i, f in enumerate(bunch.feature_names):
            if bunch.has_key("feature_value_names") and len(bunch.feature_value_names[i]) > 0:
                vals = [str(v) for v in bunch.feature_value_names[i]]
                attrs[f] = {"values": vals, "type": "Discrete", "feature": 1}
            else:
                attrs[f] = {"values": [], "type": "Continuous", "feature": 1}

    # Target:
    if dataset.is_target_nominal(bunch):
        # nominal target
        attrs["class"] = {"values": bunch.target_names, "type": "Discrete", "feature": 0}
    else:
        attrs["class"] = {"values": [], "type": "Continuous", "feature": 0}

    attrs_as_list = attrs.items()  # do not sort the features

    return render(request, "interactions/select_data.html", {"widget": widget, "attrs": attrs_as_list})
Ejemplo n.º 5
0
def select_data(request, input_dict, output_dict, widget):
    bunch = input_dict['data']

    attrs = {}
    if bunch.has_key('feature_names'):
        for i,f in enumerate(bunch.feature_names):
            if bunch.has_key('feature_value_names') and len(bunch.feature_value_names[i])>0:
                vals = [str(v) for v in bunch.feature_value_names[i]]
                attrs[f] = {'values': vals, 'type': 'Discrete', 'feature': 1}
            else:
                attrs[f] = {'values': [], 'type': 'Continuous', 'feature': 1}

    # Target:
    if dataset.is_target_nominal(bunch):
        #nominal target
        attrs['class'] = {'values': bunch.target_names, 'type': 'Discrete', 'feature': 0}
    else:
        attrs['class'] = {'values': [], 'type': 'Continuous', 'feature': 0}

    attrs_as_list = attrs.items() # do not sort the features

    return render(request, 'interactions/select_data.html', 
                  {'widget' : widget, 'attrs' : attrs_as_list})