コード例 #1
0
def instance_avgsk():
    print('=== Test how instance count affect candidate skyline ===')
    inst = [3, 4, 5, 6, 7, 8, 9, 10]
    for ins in inst:
        dqueue = batchImport('10000_dim2_pos' + str(ins) + '_rad5_01000.csv',
                             ins)
        print('========== instance count = ' + str(ins) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, ins, 5, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(2, ins, 5, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
コード例 #2
0
def radius_avgsk():
    print('=== Test how data radius affect candidate skyline ===')
    radius = [3, 4, 5, 6, 7, 8, 9, 10]
    for r in radius:
        dqueue = batchImport('10000_dim2_pos5_rad' + str(r) + '_01000.csv', 5)
        print('========== radius = ' + str(r) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, r, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, r, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
コード例 #3
0
def dim_avgsk():
    print("=== Test how dimension of data affect candidate size ===")
    dim = [2, 3, 4, 5, 6, 7, 8, 9, 10]
    for d in dim:
        dqueue = batchImport('10000_dim'+str(d)+'_pos5_rad5_01000.csv', 5)
        print('========== Data dimension = '+ str(d) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(d, 5, 5, [0,1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1/10000, avgsk2/10000
        print('Avg. sky1: '+ str(avgsk1))
        print('Avg. sky2: '+ str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(d, 5, 5, [0,1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1/10000, avgsk2/10000
        print('Avg. sky1: '+ str(avgsk1))
        print('Avg. sky2: '+ str(avgsk2))
コード例 #4
0
def wsize_avgsk():
    print("=== Test how window size affect candidate skyline ===")
    wsize = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5)
    for w in wsize:
        print('========== window size = ' + str(w) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, 5, [0, 1000], wsize=w)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, 5, [0, 1000], wsize=w)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
コード例 #5
0
 def test_batchImport(self):
     output = batchImport('test_30_dim3_pos3_rad2_0100.csv', 3)
     self.assertEqual(len(output), 30)
     psum = output[0].getProb(0) + output[0].getProb(1) + output[0].getProb(
         2)
     self.assertAlmostEqual(psum, 1)
     self.assertEqual(output[2].getLocation(4), [])
コード例 #6
0
def radius_time():
    print('=== Test how data radius affect running time ===')
    radius = [3, 4, 5, 6, 7, 8, 9, 10]
    for r in radius:
        dqueue = batchImport('10000_dim2_pos5_rad' + str(r) + '_01000.csv', 5)
        print('========== radius = ' + str(r) + ' ==========\n')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, r, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, r, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #7
0
def wsize_time():
    print("=== Test how window size affect running time ===")
    wsize = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5)
    for w in wsize:
        print('========== window size = ' + str(w) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, 5, [0, 1000], wsize=w)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, 5, [0, 1000], wsize=w)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #8
0
def dim_time():
    print("=== Test how dimension of data affect running time ===")
    dim = [2, 3, 4, 5, 6, 7, 8, 9, 10]
    for d in dim:
        dqueue = batchImport('10000_dim'+str(d)+'_pos5_rad5_01000.csv', 5)
        print('========== Data dimension = '+ str(d) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(d, 5, 5, [0,1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(d, 5, 5, [0,1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #9
0
def instance_time():
    print('=== Test how instance count affect running time ===')
    inst = [3, 4, 5, 6, 7, 8, 9, 10]
    for ins in inst:
        dqueue = batchImport('10000_dim2_pos' + str(ins) + '_rad5_01000.csv',
                             ins)
        print('========== instance count = ' + str(ins) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, ins, 5, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(2, ins, 5, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #10
0
                        skyline2.append(p)
        # prune objects in sk2
        for d in skyline2.copy():
            if d in skyline2:
                vurstart = [ self.drange[1] if i+2*self.radius+0.1 > self.drange[1] else i+2*self.radius+0.1 for i in d.getLocationMax()]
                vurend = [ self.drange[1] for i in range(self.dim)]
                vur = [ p.object for p in (self.index.intersection(tuple(vurstart+vurend),objects=True))]
                for p in vur:
                    if p in skyline2:
                       skyline2.remove(p)
        self.skyline = skyline
        self.skyline2 = skyline2

if __name__ == '__main__':
    test = slideUPSky(5, 5, 5, [0,1000], wsize=24)
    dqueue = batchImport('data.csv', 5)
    
    # prevsk1 = []
    # prevsk2 = []
    
    # with open('result.txt', 'a') as f:
    #     f.write("o_Delete,o_SK1,o_SK2,a_Delete,a_SK1,a_SK2\n")
    start_time = time.time()
    for i in range(827):
        test.receiveData(dqueue[i])
        # out = test.getOutdated().copy()
        test.updateSkyline()
        # usk1 = list(set(test.getSkyline())-set(prevsk1))
        # usk2 = list(set(test.getSkyline2())-set(prevsk2))
        # orig = {'Delete':out,'SK1':test.getSkyline(),'SK2':test.getSkyline2()}
        # arch = {'Delete':out,'SK1':usk1,'SK2':usk2}
コード例 #11
0
 def loadData(self, file):
     """
     Load data from csv file
     """
     self.data = batchImport(file, self.ps)
コード例 #12
0
ファイル: edge.py プロジェクト: 0penth3wind0w/UEdgeSkyline
            sock.connect((self.host, self.port))
            sock.sendall(sdata)
        # received = str(sock.recv(1024), "utf-8")
        print("Sent:     {}".format(updateInfo))
        # print("Received: {}".format(received))


if __name__ == "__main__":
    config = configparser.ConfigParser()
    config.read('edge.config')
    PORT = int(config['DEFAULT'].get('Port'))
    HOST = config['DEFAULT'].get('Host')

    edge = Edge(HOST, PORT)
    usky = slideUPSky(2, 5, 4, [0, 1000], wsize=10)
    dqueue = batchImport('1500_dim2_pos4_rad5_01000.csv', 4)

    for i in range(15):
        oldsk = usky.getSkyline().copy()
        oldsk2 = usky.getSkyline2().copy()
        usky.receiveData(dqueue[i])
        out = usky.getOutdated().copy()
        usky.updateSkyline()
        usk1 = list(set(usky.getSkyline()) - set(oldsk))
        usk2 = list(set(usky.getSkyline2()) - set(oldsk2))
        result = {'Delete': out, 'SK1': usk1, 'SK2': usk2}
        # edge.sendSK(usky.getSkyline(),usky.getSkyline2())
        edge.send(result)
        print('SK1')
        for d in usky.getSkyline():
            print(d.getLabel())
コード例 #13
0
        fig = plt.figure()
        if dim == 2:
            for d in datalist:
                x = [d.getLocation(i)[0] for i in range(pcount)]
                y = [d.getLocation(j)[1] for j in range(pcount)]
                plt.scatter(x, y, alpha=0.5)
                plt.xlim(drange[0], drange[1])
                plt.ylim(drange[0], drange[1])

        elif dim == 3:
            ax = fig.add_subplot(111, projection='3d')
            for d in datalist:
                x = [d.getLocation(i)[0] for i in range(pcount)]
                y = [d.getLocation(j)[1] for j in range(pcount)]
                z = [d.getLocation(k)[2] for k in range(pcount)]
                ax.scatter(x, y, z, alpha=0.5)
                ax.set_xlim(drange[0], drange[1])
                ax.set_ylim(drange[0], drange[1])
                ax.set_zlim(drange[0], drange[1])
        plt.show()


if __name__ == '__main__':
    csv = 'data.csv'
    pcount = 5
    answer = input('Would you like to change the csv file? (Y/N)')
    if answer == 'Y':
        csv = input('Please specify the csv file name in data folder: ')
        pcount = int(input('Please input the probability count: '))
    datalist = batchImport(csv, pcount)
    visualize(datalist, pcount, [0, 1000])
コード例 #14
0
import os, sys

sys.path.append(os.path.abspath(os.pardir))

import pickle
import time

from data.dataClass import Data, batchImport
from skyline.slideUPSky import slideUPSky
from visualize import visualize

if __name__ == "__main__":
    eid = input("Edge id: ")
    usky = slideUPSky(2, 5, 5, [0, 1000], wsize=300)
    dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5)

    idx = [i for i in range(10000) if i % 2 == 1]
    with open('pickle_edge' + eid + '.pickle', 'wb') as f:
        start_time = time.time()
        for i in idx:
            oldsk = usky.getSkyline().copy()
            oldsk2 = usky.getSkyline2().copy()
            usky.receiveData(dqueue[i])
            out = usky.getOutdated().copy()
            usky.updateSkyline()
            usk1 = list(set(usky.getSkyline()) - set(oldsk))
            usk2 = list(set(usky.getSkyline2()) - set(oldsk2))
            result = {'Delete': out, 'SK1': usk1, 'SK2': usk2}
            pickle.dump(result, f)
        print("--- %s seconds ---" % (time.time() - start_time))
コード例 #15
0
    l1eql2 = 0
    l2doml1 = 0
    for i in range(axis):
        if loc1[i] < loc2[i]:
            l1doml2 += 1
        elif loc1[i] > loc2[i]:
            l2doml1 += 1
        else:
            l1eql2 += 1
    if l1doml2 == 0 and l2doml1 != 0:
        return False
    elif l2doml1 == 0 and l1doml2 != 0:
        return True
    else:
        return None

if __name__ == '__main__':
    data = batchImport('test_30_dim2_pos3_rad2_0100.csv', 3)
    lbl = str(input('input the lable of data point: '))
    index = -1
    data1 = Data('tmp',2)
    for i,d in enumerate(data):
        if d.getLabel() == lbl:
            data1 = d
            index = i
            break
    del data[index]

    for d in data:
        print('Probability that '+ lbl + ' dominates ' + d.getLabel() + ' is: ' + str(dominateProbability(data1, d)))
コード例 #16
0
# Sliding window update PSky
from skyline.PSky import PSky
from data.dataClass import Data, batchImport
from visualize.visualize import visualize

import random

from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle

from data.dataClass import Data, batchImport
from skyline.slideUPSky import slideUPSky

test = slideUPSky(2, 5, 4, [0,100], wsize=10)
data = batchImport('20_dim2_pos5_rad5_0100.csv', 5)

fig = plt.figure()
ax = fig.add_subplot(111)

for d in data:
    x = [d.getLocation(i)[0] for i in range(5)]
    y = [d.getLocation(j)[1] for j in range(5)]
    ax.scatter(x,y,alpha=0.5)
    ax.add_patch(Rectangle(xy=(d.getLocationMin()[0],d.getLocationMin()[1]),width=d.getLocationMax()[0]-d.getLocationMin()[0], height=d.getLocationMax()[1]-d.getLocationMin()[1], linewidth=1, fill=False))    

plt.show()

# for i in range(100):
#     test.receiveData(dqueue[i])
#     test.updateSkyline()