Exemple #1
0
def test_LOF_normalize_instances():
    l = lof.LOF(((1, 1), (2, 2)), normalize=True)

    assert l.instances == [(0.0, 0.0), (1.0, 1.0)]

    l = lof.LOF(((1, 1), (2, 2), (3, 3)), normalize=True)

    assert l.instances == [(0.0, 0.0), (0.5, 0.5), (1.0, 1.0)]
Exemple #2
0
 def test_get_lof_sorted_filtered_ascending_filtered_range(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     lofs = test_lof.get_lof_sorted_filtered(False, 0, 2)
     assert (len(lofs) == 3)
     assert (lofs[0] == ('a', 0.8749999999999999))
     assert (lofs[2] == ('b', 1.3333333333333333))
     assert (lofs[1] == ('c', 0.8749999999999999))
Exemple #3
0
 def test_get_all_lof(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     lofs = test_lof.get_all_lof()
     assert (len(lofs) == 4)
     assert (lofs[0] == ('a', 0.8749999999999999))
     assert (lofs[1] == ('b', 1.3333333333333333))
     assert (lofs[2] == ('c', 0.8749999999999999))
     assert (lofs[3] == ('d', 2.0))
Exemple #4
0
 def test_init_with_coords_input_from_csv_file_name(self):
     test_lof = lof.LOF('test.csv', lof.LOF.CONST_MANHATTAN, 2)
     lofs = test_lof.get_all_lof()
     assert (len(lofs) == 4)
     assert (lofs[0] == ('coord_0_x_0_y_0', 0.8749999999999999))
     assert (lofs[1] == ('coord_1_x_0_y_1', 1.3333333333333333))
     assert (lofs[2] == ('coord_2_x_1_y_1', 0.8749999999999999))
     assert (lofs[3] == ('coord_3_x_3_y_0', 2.0))
Exemple #5
0
 def test_get_lof_sorted_filtered_descending(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     lofs = test_lof.get_lof_sorted_filtered(True)
     print(lofs)
     assert (len(lofs) == 4)
     assert (lofs[2] == ('a', 0.8749999999999999))
     assert (lofs[1] == ('b', 1.3333333333333333))
     assert (lofs[3] == ('c', 0.8749999999999999))
     assert (lofs[0] == ('d', 2.0))
Exemple #6
0
    def test_init_with_coords_input_as_array_of_tuples(self):
        coords_as_array_of_tuples = [(0, 0), (0, 1), (1, 1), (3, 0)]

        test_lof = lof.LOF(coords_as_array_of_tuples, lof.LOF.CONST_MANHATTAN,
                           2)
        lofs = test_lof.get_all_lof()
        assert (len(lofs) == 4)
        assert (lofs[0] == ('coord_0_x_0_y_0', 0.8749999999999999))
        assert (lofs[1] == ('coord_1_x_0_y_1', 1.3333333333333333))
        assert (lofs[2] == ('coord_2_x_1_y_1', 0.8749999999999999))
        assert (lofs[3] == ('coord_3_x_3_y_0', 2.0))
Exemple #7
0
    def test_init_with_coords_input_as_x_and_y_array(self):
        x = [0, 0, 1, 3]
        y = [0, 1, 1, 0]
        coords_as_x_and_y_array = [x, y]

        test_lof = lof.LOF(coords_as_x_and_y_array, lof.LOF.CONST_MANHATTAN, 2)
        lofs = test_lof.get_all_lof()
        assert (len(lofs) == 4)
        assert (lofs[0] == ('coord_0_x_0_y_0', 0.8749999999999999))
        assert (lofs[1] == ('coord_1_x_0_y_1', 1.3333333333333333))
        assert (lofs[2] == ('coord_2_x_1_y_1', 0.8749999999999999))
        assert (lofs[3] == ('coord_3_x_3_y_0', 2.0))
Exemple #8
0
    def test_get_unique_pairs(self):
        test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
        test_lof.get_unique_pairs()
        pairs = []
        for pair in test_lof.get_unique_pairs():
            pairs.append(pair)
            print(pair)

        assert (len(pairs) == 6)
        assert (pairs[0] == ('a', 'b'))
        assert (pairs[1] == ('a', 'c'))
        assert (pairs[2] == ('a', 'd'))
        assert (pairs[3] == ('b', 'c'))
        assert (pairs[4] == ('b', 'd'))
        assert (pairs[5] == ('c', 'd'))
Exemple #9
0
    def test_init_with_coords_input_as_ordered_dict(self):
        coords_as_ordered_dict = OrderedDict([
            ('a', OrderedDict([('x', 0), ('y', 0)])),
            ('b', OrderedDict([('x', 0), ('y', 1)])),
            ('c', OrderedDict([('x', 1), ('y', 1)])),
            ('d', OrderedDict([('x', 3), ('y', 0)]))
        ])

        test_lof = lof.LOF(coords_as_ordered_dict, lof.LOF.CONST_MANHATTAN, 2)
        lofs = test_lof.get_all_lof()
        assert (len(lofs) == 4)
        assert (lofs[0] == ('a', 0.8749999999999999))
        assert (lofs[1] == ('b', 1.3333333333333333))
        assert (lofs[2] == ('c', 0.8749999999999999))
        assert (lofs[3] == ('d', 2.0))
Exemple #10
0
def test_LOF_normalize_instances():
    l = lof.LOF(np.array(((1, 1), (2, 2))), normalize=True)
    assert np.allclose(l.instances, np.array([(0.0, 0.0), (1.0, 1.0)]))
    l = lof.LOF(np.array(((1, 1), (2, 2), (3, 3))), normalize=True)
    assert np.allclose(l.instances,
                       np.array([(0.0, 0.0), (0.5, 0.5), (1.0, 1.0)]))
Exemple #11
0
def index():
    # open data file
    data = []
    isFirstLine = 1
    with open('./static/shuttle20.csv', newline='') as readfile:
        reader = csv.reader(readfile)
        for row in reader:
            # do not read the first row!
            if isFirstLine == 1:
                feature_name = row
                isFirstLine = 0
            else:
                tmp1 = map(eval, row)
                tmp2 = list(tmp1)
                data.append(tmp2)
            # empty data in csv will cause "unexpected EOF while parsing"
    # print(data)
    print("data loaded!")

    # calculate feature distribution
    (feature_position, feature_position_min, feature_position_max,
     feature_variance, feature_mean) = feature_explore.feature_analysis(data)

    # initialize LOF class
    k_list = []
    k_list = lof.initialize_k_list(data)
    benchmark = 1  # the gap between k and RC's size

    l = lof.LOF(data, k_list)
    # calculate data position
    normalized_data_array = np.array(l.instances)
    (data_position, data_position_min,
     data_position_max) = mymds.get_position(normalized_data_array, 2)
    '''print("start to compute RC")

    rare_centers = lof.outliers(data, k_list)

    center = rare_centers[0] 
    center_index = center["index"]
    center_kinf = center["k_inf"]
    # center point and its k-neighbours
    neighbours = lof.get_neighbours(center_kinf, data[center_index], data) 
    # put togather as RC
    category = copy.deepcopy(neighbours)
    category.append(data[center_index])
    #print(category)

    category_mean = ca.category_mean_relative(category,feature_mean)'''
    category_mean = [
        -3.066666666666663, -0.8555555555555556, -0.8944444444444457,
        -0.1222222222222222, 3.811111111111117, 5.094444444444445,
        2.1611111111111114, -4.916666666666664, -7.033333333333333
    ]
    category_index = [14, 7, 19, 15, 16, 5, 2, 9, 13]
    print(category_mean)

    return_data = {
        "raw_data": data,
        "feature_position": feature_position,
        "feature_position_min": feature_position_min,
        "feature_position_max": feature_position_max,
        "feature_variance": feature_variance,
        "feature_name": feature_name,
        "data_position": data_position,
        "data_position_min": data_position_min,
        "data_position_max": data_position_max,
        "category_mean": category_mean,
        "category_index": category_index
    }

    ######### POST request #########
    '''if request.method == "POST":
        print("This is post")
        list_add = []
        getdata = json.loads(request.get_data())["data"]
        print(getdata)'''

    print("This is post")
    recv_data = request.get_data()
    print(recv_data)

    return render_template('index.html', data=return_data)
import matplotlib.pyplot as plt
import lof


x = [0, 0, 1, 1, 4, 8, 9, 8, 9]
y = [0, 1, 1, 0, 0, 9, 8, 8, 9]
coords_as_x_and_y_array = [x, y]

test_lof = lof.LOF(coords_as_x_and_y_array, lof.LOF.CONST_MANHATTAN, 1)
lofs = test_lof.get_lof_sorted_filtered(True)
for l in lofs:
    print(str(l[0]) + ": " + str(l[1]))

plt.scatter(x, y)

plt.xlabel('x')
plt.ylabel('y')
plt.title('K Value Too Small')
plt.grid(True)
# plt.savefig("test.png")
plt.show()
Exemple #13
0
 def test_get_lof_sorted_filtered_descending_filtered(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     lofs = test_lof.get_lof_sorted_filtered(True, 1, 2)
     assert (len(lofs) == 1)
     assert (lofs[0] == ('b', 1.3333333333333333))
Exemple #14
0
 def test_print_all_data(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     test_lof.print_all_data()
Exemple #15
0
 def test(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     test_lof.print_all_lof()
     pass
Exemple #16
0
            # empty data in csv will cause "unexpected EOF while parsing"
#print(data)
print("data loaded!")

# calculate feature distribution
(feature_position, feature_position_min, feature_position_max,
 feature_variance, feature_mean) = feature_explore.feature_analysis(data)
#print(feature_position)
#print(feature_position_max)
print(feature_mean)
#calculate rare category
k_list = []
k_list = lof.initialize_k_list(data)
benchmark = 1  # the gap between k and RC's size

l = lof.LOF(data, k_list)

normalized_data_array = np.array(l.instances)
(
    data_position,
    data_position_min,
    data_position_max,
) = mymds.get_position(normalized_data_array, 2)
#print(data_position)

print("start to compute RC")

rare_centers = lof.outliers(data, k_list)
#print(rare_centers)

center = rare_centers[0]
Exemple #17
0
 def test_print_all_lof(self):
     test_lof = lof.LOF(self.coords, lof.LOF.CONST_MANHATTAN, 2)
     print(test_lof.print_all_lof())