Example #1
0
def testfull():
    """
    Tests the fully enumerated Fisher-Jenks implementation
    """
    cores = [1, 2, 4, 16, 32]
    classes = [5, 6, 7]
    data_sizes = [
        500, 1000, 2500, 5000, 7500, 10000, 12500, 15000, 17500, 20000, 22500,
        25000
    ]

    for c in cores:
        for d in data_sizes:
            for k in classes:
                data = np.random.ranf(size=d)
                try:
                    t1 = time.time()
                    #wrapped in try since we will blow out RAM at some point
                    classification = fisher_jenks(data, k, c)
                    t2 = time.time()
                    print "Processed {0} data points in {1} classes using {2} cores. Total time: {3}".format(
                        d, k, c, t2 - t1)
                    data = None
                except KeyboardInterrupt:
                    print "Aborting"
                    sys.exit(1)
                except:
                    print "FAILURE: {0} data points.".format(d)
Example #2
0
def testfull():
    """
    Tests the fully enumerated Fisher-Jenks implementation
    """
    cores = [1, 2, 4, 16, 32]
    classes = [5, 6, 7]
    data_sizes = [500, 1000, 2500, 5000, 7500, 10000, 12500, 15000, 17500, 20000, 22500, 25000]

    for c in cores:
        for d in data_sizes:
            for k in classes:
                data = np.random.ranf(size=d)
                try:
                    t1 = time.time()
                    # wrapped in try since we will blow out RAM at some point
                    classification = fisher_jenks(data, k, c)
                    t2 = time.time()
                    print "Processed {0} data points in {1} classes using {2} cores. Total time: {3}".format(
                        d, k, c, t2 - t1
                    )
                    data = None
                except KeyboardInterrupt:
                    print "Aborting"
                    sys.exit(1)
                except:
                    print "FAILURE: {0} data points.".format(d)
Example #3
0
def testsample():
    """
    Tests the sampled Fisher-Jenks implementation
    """
    cores = [1, 2, 4, 16, 32]
    classes = [5, 6, 7]
    data_sizes = [
        10000, 20000, 40000, 80000, 160000, 320000, 640000, 1280000, 2560000,
        5120000, 10240000, 20480000, 40960000, 81920000, 163840000, 327680000,
        655360000
    ]
    for c in cores:
        for d in data_sizes:
            for k in classes:
                #Generate the test data and save to disk
                data = np.random.ranf(size=d)
                nobs = len(data)
                np.save('testfile.npy', data)
                data = None

                #Compute the sample size as the sqrt of nobs
                sqrt = math.sqrt(nobs)
                if sqrt > 40000:
                    sqrt = 40000
                pct = sqrt / float(d)

                #Load the data back into memory as a mmapped file
                f = np.load('testfile.npy', mmap_mode='r+')
                t1 = time.time()
                data = fj_generate_sample(f, pct=pct)
                t2 = time.time()
                print "Randomly sampling {0} percent of {1} observations for a run size of {2} observations took {3} seconds.".format(
                    pct, nobs, sqrt, t2 - t1)
                try:
                    t1 = time.time()
                    #wrapped in try since we will blow out RAM at some point
                    classification = fisher_jenks(data, k, c)
                    t2 = time.time()
                    print "Processed {0} data points in {1} classes using {2} cores. Total time: {3}".format(
                        d, k, c, t2 - t1)
                except KeyboardInterrupt:
                    print "Aborting"
                    sys.exit(1)
                except:
                    print "FAILURE: {0} data points.".format(d)
Example #4
0
import sys
import time
import numpy
from fj_refactored import fisher_jenks

cores = [1,2,4,16,32]
classes = [5,6,7]
data_sizes = [500, 1000, 2500, 5000, 7500, 10000]

for c in cores:
    for d in data_sizes:
        for k in classes:
            data = numpy.random.ranf(size=d)
            try:
                t1 = time.time()
                #wrapped in try since we will blow out RAM at some point
                classification = fisher_jenks(data, k, c)
                t2 = time.time()
                print "Processed {0} data points in {1} classes using {2} cores. Total time: {3}".format(d, k, c, t2-t1)
            except KeyboardInterrupt:
                print "Aborting"
                sys.exit(1)
            except:
                print "FAILURE: {0} data points.".format(d)
Example #5
0
import sys
import time
import numpy
from fj_refactored import fisher_jenks

cores = [1, 2, 4, 16, 32]
classes = [5, 6, 7]
data_sizes = [500, 1000, 2500, 5000, 7500, 10000]

for c in cores:
    for d in data_sizes:
        for k in classes:
            data = numpy.random.ranf(size=d)
            try:
                t1 = time.time()
                #wrapped in try since we will blow out RAM at some point
                classification = fisher_jenks(data, k, c)
                t2 = time.time()
                print "Processed {0} data points in {1} classes using {2} cores. Total time: {3}".format(
                    d, k, c, t2 - t1)
            except KeyboardInterrupt:
                print "Aborting"
                sys.exit(1)
            except:
                print "FAILURE: {0} data points.".format(d)
Example #6
0
def testsample():
    """
    Tests the sampled Fisher-Jenks implementation
    """
    cores = [1, 2, 4, 16, 32]
    classes = [5, 6, 7]
    data_sizes = [
        10000,
        20000,
        40000,
        80000,
        160000,
        320000,
        640000,
        1280000,
        2560000,
        5120000,
        10240000,
        20480000,
        40960000,
        81920000,
        163840000,
        327680000,
        655360000,
    ]
    for c in cores:
        for d in data_sizes:
            for k in classes:
                # Generate the test data and save to disk
                data = np.random.ranf(size=d)
                nobs = len(data)
                np.save("testfile.npy", data)
                data = None

                # Compute the sample size as the sqrt of nobs
                sqrt = math.sqrt(nobs)
                if sqrt > 40000:
                    sqrt = 40000
                pct = sqrt / float(d)

                # Load the data back into memory as a mmapped file
                f = np.load("testfile.npy", mmap_mode="r+")
                t1 = time.time()
                data = fj_generate_sample(f, pct=pct)
                t2 = time.time()
                print "Randomly sampling {0} percent of {1} observations for a run size of {2} observations took {3} seconds.".format(
                    pct, nobs, sqrt, t2 - t1
                )
                try:
                    t1 = time.time()
                    # wrapped in try since we will blow out RAM at some point
                    classification = fisher_jenks(data, k, c)
                    t2 = time.time()
                    print "Processed {0} data points in {1} classes using {2} cores. Total time: {3}".format(
                        d, k, c, t2 - t1
                    )
                except KeyboardInterrupt:
                    print "Aborting"
                    sys.exit(1)
                except:
                    print "FAILURE: {0} data points.".format(d)