def instance_avgsk(): print('=== Test how instance count affect candidate skyline ===') inst = [3, 4, 5, 6, 7, 8, 9, 10] for ins in inst: dqueue = batchImport('10000_dim2_pos' + str(ins) + '_rad5_01000.csv', ins) print('========== instance count = ' + str(ins) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(2, ins, 5, [0, 1000], wsize=300) avgsk1, avgsk2 = 0, 0 for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() avgsk1 += len(tbsky.getSkyline()) avgsk2 += len(tbsky.getSkyline2()) tbsky.removeRtree() avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000 print('Avg. sky1: ' + str(avgsk1)) print('Avg. sky2: ' + str(avgsk2)) print('---------- Update ----------') tusky = slideUPSky(2, ins, 5, [0, 1000], wsize=300) avgsk1, avgsk2 = 0, 0 for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() avgsk1 += len(tusky.getSkyline()) avgsk2 += len(tusky.getSkyline2()) tusky.removeRtree() avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000 print('Avg. sky1: ' + str(avgsk1)) print('Avg. sky2: ' + str(avgsk2))
def radius_avgsk(): print('=== Test how data radius affect candidate skyline ===') radius = [3, 4, 5, 6, 7, 8, 9, 10] for r in radius: dqueue = batchImport('10000_dim2_pos5_rad' + str(r) + '_01000.csv', 5) print('========== radius = ' + str(r) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(2, 5, r, [0, 1000], wsize=300) avgsk1, avgsk2 = 0, 0 for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() avgsk1 += len(tbsky.getSkyline()) avgsk2 += len(tbsky.getSkyline2()) tbsky.removeRtree() avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000 print('Avg. sky1: ' + str(avgsk1)) print('Avg. sky2: ' + str(avgsk2)) print('---------- Update ----------') tusky = slideUPSky(2, 5, r, [0, 1000], wsize=300) avgsk1, avgsk2 = 0, 0 for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() avgsk1 += len(tusky.getSkyline()) avgsk2 += len(tusky.getSkyline2()) tusky.removeRtree() avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000 print('Avg. sky1: ' + str(avgsk1)) print('Avg. sky2: ' + str(avgsk2))
def dim_avgsk(): print("=== Test how dimension of data affect candidate size ===") dim = [2, 3, 4, 5, 6, 7, 8, 9, 10] for d in dim: dqueue = batchImport('10000_dim'+str(d)+'_pos5_rad5_01000.csv', 5) print('========== Data dimension = '+ str(d) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(d, 5, 5, [0,1000], wsize=300) avgsk1, avgsk2 = 0, 0 for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() avgsk1 += len(tbsky.getSkyline()) avgsk2 += len(tbsky.getSkyline2()) tbsky.removeRtree() avgsk1, avgsk2 = avgsk1/10000, avgsk2/10000 print('Avg. sky1: '+ str(avgsk1)) print('Avg. sky2: '+ str(avgsk2)) print('---------- Update ----------') tusky = slideUPSky(d, 5, 5, [0,1000], wsize=300) avgsk1, avgsk2 = 0, 0 for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() avgsk1 += len(tusky.getSkyline()) avgsk2 += len(tusky.getSkyline2()) tusky.removeRtree() avgsk1, avgsk2 = avgsk1/10000, avgsk2/10000 print('Avg. sky1: '+ str(avgsk1)) print('Avg. sky2: '+ str(avgsk2))
def wsize_avgsk(): print("=== Test how window size affect candidate skyline ===") wsize = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5) for w in wsize: print('========== window size = ' + str(w) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(2, 5, 5, [0, 1000], wsize=w) avgsk1, avgsk2 = 0, 0 for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() avgsk1 += len(tbsky.getSkyline()) avgsk2 += len(tbsky.getSkyline2()) tbsky.removeRtree() avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000 print('Avg. sky1: ' + str(avgsk1)) print('Avg. sky2: ' + str(avgsk2)) print('---------- Update ----------') tusky = slideUPSky(2, 5, 5, [0, 1000], wsize=w) avgsk1, avgsk2 = 0, 0 for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() avgsk1 += len(tusky.getSkyline()) avgsk2 += len(tusky.getSkyline2()) tusky.removeRtree() avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000 print('Avg. sky1: ' + str(avgsk1)) print('Avg. sky2: ' + str(avgsk2))
def test_batchImport(self): output = batchImport('test_30_dim3_pos3_rad2_0100.csv', 3) self.assertEqual(len(output), 30) psum = output[0].getProb(0) + output[0].getProb(1) + output[0].getProb( 2) self.assertAlmostEqual(psum, 1) self.assertEqual(output[2].getLocation(4), [])
def radius_time(): print('=== Test how data radius affect running time ===') radius = [3, 4, 5, 6, 7, 8, 9, 10] for r in radius: dqueue = batchImport('10000_dim2_pos5_rad' + str(r) + '_01000.csv', 5) print('========== radius = ' + str(r) + ' ==========\n') print('---------- Brute force ----------') tbsky = slideBPSky(2, 5, r, [0, 1000], wsize=300) start_time = time.time() for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() tbsky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time)) print('---------- Update ----------') tusky = slideUPSky(2, 5, r, [0, 1000], wsize=300) start_time = time.time() for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() tusky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time))
def wsize_time(): print("=== Test how window size affect running time ===") wsize = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5) for w in wsize: print('========== window size = ' + str(w) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(2, 5, 5, [0, 1000], wsize=w) start_time = time.time() for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() tbsky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time)) print('---------- Update ----------') tusky = slideUPSky(2, 5, 5, [0, 1000], wsize=w) start_time = time.time() for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() tusky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time))
def dim_time(): print("=== Test how dimension of data affect running time ===") dim = [2, 3, 4, 5, 6, 7, 8, 9, 10] for d in dim: dqueue = batchImport('10000_dim'+str(d)+'_pos5_rad5_01000.csv', 5) print('========== Data dimension = '+ str(d) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(d, 5, 5, [0,1000], wsize=300) start_time = time.time() for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() tbsky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time)) print('---------- Update ----------') tusky = slideUPSky(d, 5, 5, [0,1000], wsize=300) start_time = time.time() for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() tusky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time))
def instance_time(): print('=== Test how instance count affect running time ===') inst = [3, 4, 5, 6, 7, 8, 9, 10] for ins in inst: dqueue = batchImport('10000_dim2_pos' + str(ins) + '_rad5_01000.csv', ins) print('========== instance count = ' + str(ins) + ' ==========') print('---------- Brute force ----------') tbsky = slideBPSky(2, ins, 5, [0, 1000], wsize=300) start_time = time.time() for i in range(10000): tbsky.receiveData(dqueue[i]) tbsky.updateSkyline() tbsky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time)) print('---------- Update ----------') tusky = slideUPSky(2, ins, 5, [0, 1000], wsize=300) start_time = time.time() for i in range(10000): tusky.receiveData(dqueue[i]) tusky.updateSkyline() tusky.removeRtree() print("--- %s seconds ---" % (time.time() - start_time))
skyline2.append(p) # prune objects in sk2 for d in skyline2.copy(): if d in skyline2: vurstart = [ self.drange[1] if i+2*self.radius+0.1 > self.drange[1] else i+2*self.radius+0.1 for i in d.getLocationMax()] vurend = [ self.drange[1] for i in range(self.dim)] vur = [ p.object for p in (self.index.intersection(tuple(vurstart+vurend),objects=True))] for p in vur: if p in skyline2: skyline2.remove(p) self.skyline = skyline self.skyline2 = skyline2 if __name__ == '__main__': test = slideUPSky(5, 5, 5, [0,1000], wsize=24) dqueue = batchImport('data.csv', 5) # prevsk1 = [] # prevsk2 = [] # with open('result.txt', 'a') as f: # f.write("o_Delete,o_SK1,o_SK2,a_Delete,a_SK1,a_SK2\n") start_time = time.time() for i in range(827): test.receiveData(dqueue[i]) # out = test.getOutdated().copy() test.updateSkyline() # usk1 = list(set(test.getSkyline())-set(prevsk1)) # usk2 = list(set(test.getSkyline2())-set(prevsk2)) # orig = {'Delete':out,'SK1':test.getSkyline(),'SK2':test.getSkyline2()} # arch = {'Delete':out,'SK1':usk1,'SK2':usk2}
def loadData(self, file): """ Load data from csv file """ self.data = batchImport(file, self.ps)
sock.connect((self.host, self.port)) sock.sendall(sdata) # received = str(sock.recv(1024), "utf-8") print("Sent: {}".format(updateInfo)) # print("Received: {}".format(received)) if __name__ == "__main__": config = configparser.ConfigParser() config.read('edge.config') PORT = int(config['DEFAULT'].get('Port')) HOST = config['DEFAULT'].get('Host') edge = Edge(HOST, PORT) usky = slideUPSky(2, 5, 4, [0, 1000], wsize=10) dqueue = batchImport('1500_dim2_pos4_rad5_01000.csv', 4) for i in range(15): oldsk = usky.getSkyline().copy() oldsk2 = usky.getSkyline2().copy() usky.receiveData(dqueue[i]) out = usky.getOutdated().copy() usky.updateSkyline() usk1 = list(set(usky.getSkyline()) - set(oldsk)) usk2 = list(set(usky.getSkyline2()) - set(oldsk2)) result = {'Delete': out, 'SK1': usk1, 'SK2': usk2} # edge.sendSK(usky.getSkyline(),usky.getSkyline2()) edge.send(result) print('SK1') for d in usky.getSkyline(): print(d.getLabel())
fig = plt.figure() if dim == 2: for d in datalist: x = [d.getLocation(i)[0] for i in range(pcount)] y = [d.getLocation(j)[1] for j in range(pcount)] plt.scatter(x, y, alpha=0.5) plt.xlim(drange[0], drange[1]) plt.ylim(drange[0], drange[1]) elif dim == 3: ax = fig.add_subplot(111, projection='3d') for d in datalist: x = [d.getLocation(i)[0] for i in range(pcount)] y = [d.getLocation(j)[1] for j in range(pcount)] z = [d.getLocation(k)[2] for k in range(pcount)] ax.scatter(x, y, z, alpha=0.5) ax.set_xlim(drange[0], drange[1]) ax.set_ylim(drange[0], drange[1]) ax.set_zlim(drange[0], drange[1]) plt.show() if __name__ == '__main__': csv = 'data.csv' pcount = 5 answer = input('Would you like to change the csv file? (Y/N)') if answer == 'Y': csv = input('Please specify the csv file name in data folder: ') pcount = int(input('Please input the probability count: ')) datalist = batchImport(csv, pcount) visualize(datalist, pcount, [0, 1000])
import os, sys sys.path.append(os.path.abspath(os.pardir)) import pickle import time from data.dataClass import Data, batchImport from skyline.slideUPSky import slideUPSky from visualize import visualize if __name__ == "__main__": eid = input("Edge id: ") usky = slideUPSky(2, 5, 5, [0, 1000], wsize=300) dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5) idx = [i for i in range(10000) if i % 2 == 1] with open('pickle_edge' + eid + '.pickle', 'wb') as f: start_time = time.time() for i in idx: oldsk = usky.getSkyline().copy() oldsk2 = usky.getSkyline2().copy() usky.receiveData(dqueue[i]) out = usky.getOutdated().copy() usky.updateSkyline() usk1 = list(set(usky.getSkyline()) - set(oldsk)) usk2 = list(set(usky.getSkyline2()) - set(oldsk2)) result = {'Delete': out, 'SK1': usk1, 'SK2': usk2} pickle.dump(result, f) print("--- %s seconds ---" % (time.time() - start_time))
l1eql2 = 0 l2doml1 = 0 for i in range(axis): if loc1[i] < loc2[i]: l1doml2 += 1 elif loc1[i] > loc2[i]: l2doml1 += 1 else: l1eql2 += 1 if l1doml2 == 0 and l2doml1 != 0: return False elif l2doml1 == 0 and l1doml2 != 0: return True else: return None if __name__ == '__main__': data = batchImport('test_30_dim2_pos3_rad2_0100.csv', 3) lbl = str(input('input the lable of data point: ')) index = -1 data1 = Data('tmp',2) for i,d in enumerate(data): if d.getLabel() == lbl: data1 = d index = i break del data[index] for d in data: print('Probability that '+ lbl + ' dominates ' + d.getLabel() + ' is: ' + str(dominateProbability(data1, d)))
# Sliding window update PSky from skyline.PSky import PSky from data.dataClass import Data, batchImport from visualize.visualize import visualize import random from matplotlib import pyplot as plt from matplotlib.patches import Rectangle from data.dataClass import Data, batchImport from skyline.slideUPSky import slideUPSky test = slideUPSky(2, 5, 4, [0,100], wsize=10) data = batchImport('20_dim2_pos5_rad5_0100.csv', 5) fig = plt.figure() ax = fig.add_subplot(111) for d in data: x = [d.getLocation(i)[0] for i in range(5)] y = [d.getLocation(j)[1] for j in range(5)] ax.scatter(x,y,alpha=0.5) ax.add_patch(Rectangle(xy=(d.getLocationMin()[0],d.getLocationMin()[1]),width=d.getLocationMax()[0]-d.getLocationMin()[0], height=d.getLocationMax()[1]-d.getLocationMin()[1], linewidth=1, fill=False)) plt.show() # for i in range(100): # test.receiveData(dqueue[i]) # test.updateSkyline()