def instance_avgsk():
    print('=== Test how instance count affect candidate skyline ===')
    inst = [3, 4, 5, 6, 7, 8, 9, 10]
    for ins in inst:
        dqueue = batchImport('10000_dim2_pos' + str(ins) + '_rad5_01000.csv',
                             ins)
        print('========== instance count = ' + str(ins) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, ins, 5, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(2, ins, 5, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
def radius_avgsk():
    print('=== Test how data radius affect candidate skyline ===')
    radius = [3, 4, 5, 6, 7, 8, 9, 10]
    for r in radius:
        dqueue = batchImport('10000_dim2_pos5_rad' + str(r) + '_01000.csv', 5)
        print('========== radius = ' + str(r) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, r, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, r, [0, 1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
Exemple #3
0
def wsize_avgsk():
    print("=== Test how window size affect candidate skyline ===")
    wsize = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5)
    for w in wsize:
        print('========== window size = ' + str(w) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, 5, [0, 1000], wsize=w)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, 5, [0, 1000], wsize=w)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1 / 10000, avgsk2 / 10000
        print('Avg. sky1: ' + str(avgsk1))
        print('Avg. sky2: ' + str(avgsk2))
Exemple #4
0
def dim_avgsk():
    print("=== Test how dimension of data affect candidate size ===")
    dim = [2, 3, 4, 5, 6, 7, 8, 9, 10]
    for d in dim:
        dqueue = batchImport('10000_dim'+str(d)+'_pos5_rad5_01000.csv', 5)
        print('========== Data dimension = '+ str(d) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(d, 5, 5, [0,1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
            avgsk1 += len(tbsky.getSkyline())
            avgsk2 += len(tbsky.getSkyline2())
        tbsky.removeRtree()
        avgsk1, avgsk2 = avgsk1/10000, avgsk2/10000
        print('Avg. sky1: '+ str(avgsk1))
        print('Avg. sky2: '+ str(avgsk2))
        print('---------- Update ----------')
        tusky = slideUPSky(d, 5, 5, [0,1000], wsize=300)
        avgsk1, avgsk2 = 0, 0
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
            avgsk1 += len(tusky.getSkyline())
            avgsk2 += len(tusky.getSkyline2())
        tusky.removeRtree()
        avgsk1, avgsk2 = avgsk1/10000, avgsk2/10000
        print('Avg. sky1: '+ str(avgsk1))
        print('Avg. sky2: '+ str(avgsk2))
def radius_time():
    print('=== Test how data radius affect running time ===')
    radius = [3, 4, 5, 6, 7, 8, 9, 10]
    for r in radius:
        dqueue = batchImport('10000_dim2_pos5_rad' + str(r) + '_01000.csv', 5)
        print('========== radius = ' + str(r) + ' ==========\n')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, r, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, r, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
Exemple #6
0
def wsize_time():
    print("=== Test how window size affect running time ===")
    wsize = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
    dqueue = batchImport('10000_dim2_pos5_rad5_01000.csv', 5)
    for w in wsize:
        print('========== window size = ' + str(w) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, 5, 5, [0, 1000], wsize=w)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(2, 5, 5, [0, 1000], wsize=w)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
Exemple #7
0
def dim_time():
    print("=== Test how dimension of data affect running time ===")
    dim = [2, 3, 4, 5, 6, 7, 8, 9, 10]
    for d in dim:
        dqueue = batchImport('10000_dim'+str(d)+'_pos5_rad5_01000.csv', 5)
        print('========== Data dimension = '+ str(d) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(d, 5, 5, [0,1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(d, 5, 5, [0,1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
def instance_time():
    print('=== Test how instance count affect running time ===')
    inst = [3, 4, 5, 6, 7, 8, 9, 10]
    for ins in inst:
        dqueue = batchImport('10000_dim2_pos' + str(ins) + '_rad5_01000.csv',
                             ins)
        print('========== instance count = ' + str(ins) + ' ==========')
        print('---------- Brute force ----------')
        tbsky = slideBPSky(2, ins, 5, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tbsky.receiveData(dqueue[i])
            tbsky.updateSkyline()
        tbsky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))
        print('---------- Update ----------')
        tusky = slideUPSky(2, ins, 5, [0, 1000], wsize=300)
        start_time = time.time()
        for i in range(10000):
            tusky.receiveData(dqueue[i])
            tusky.updateSkyline()
        tusky.removeRtree()
        print("--- %s seconds ---" % (time.time() - start_time))