Ejemplo n.º 1
0
def process_wine(filename):
    red_wine = numpy.genfromtxt(filename, delimiter=";", skip_header=True)

    # deletes rows containing NaN values
    red_wine = red_wine[~numpy.isnan(red_wine).any(axis=1)]

    # classifies wines into 1 and 0
    for row in red_wine:
        if row[-1] > 5:
            row[-1] = 1
        else:
            row[-1] = 0

    # removes outliers from selected columns
    list_del = {3, 4, 9}
    for i in list_del:
        li = Transformations.find_outliers(red_wine[:, i], 1.8)
        for j in li:
            red_wine = numpy.delete(red_wine, j, axis=0)

    # adds features
    # y = red_wine[:, -1]
    # x = red_wine[:, 0:-1]
    # x = Transformations.add_feature(x, [1, 4])
    # red_wine = numpy.concatenate((x, y.reshape(y.shape[0], 1)), axis=1)

    # selects features
    # red_wine = Transformations.select_feature(red_wine, [8, 3])

    # normalizes each column
    for i in range(red_wine.shape[1]):
        red_wine[:, i] = normalize(red_wine[:, i])
    return red_wine