Beispiel #1
0
def k_distance(dataset, k, sd_away, distance=proximity.euclidean):
    ''' compute the k-distance as radius for dbscan.
        k-distance is obtained by the min. distance immediately
        larger than a particular standard deviation away from mean '''

    global cached
    if not cached:
        proximity.build_cache(dataset, distance)
        cached = True

    kdist = [pairs[k][1] for pairs in [proximity.cache[a] for a in dataset]]
    kdist.sort()

    size = len(kdist)
    mean = sum(kdist) / float(size)
    diff = [(d - mean)**2 for d in kdist]
    sd = math.sqrt(sum(diff) / float(size - 1))

    anchor = mean + (sd * sd_away)

    print 'mean:', mean, 'sd:', sd, 'sd + mean:', anchor,

    last = None
    for d in reversed(kdist):
        if d > anchor:
            last = d
        if d < anchor and last is not None:
            print 'kdist:', last
            return last
    raise Exception('sd too far away from mean')
Beispiel #2
0
def k_distance(dataset, k, sd_away, distance=proximity.euclidean):
    ''' compute the k-distance as radius for dbscan.
k-distance is obtained by the min. distance immediately
larger than a particular standard deviation away from mean '''

    global cached
    if not cached:
        proximity.build_cache(dataset, distance)
        cached = True

    kdist = [
        pairs[k][1] for pairs in
        [proximity.cache[a] for a in dataset]
    ]
    kdist.sort()

    size = len(kdist)
    mean = sum(kdist) / float(size)
    diff = [(d - mean) ** 2 for d in kdist]
    sd = math.sqrt(sum(diff) / float(size - 1))

    anchor = mean + (sd * sd_away)

    print 'mean:', mean, 'sd:', sd, 'sd + mean:', anchor,

    last = None
    for d in reversed(kdist):
        if d > anchor:
            last = d
        if d < anchor and last is not None:
            print 'kdist:', last
            return last
    raise Exception('sd too far away from mean')
Beispiel #3
0
def find_neighbour(instance, dataset, radius, distance):
    ''' find all neighbour within radius '''
    global cached
    if not cached:
        proximity.build_cache(dataset, distance)
        cached = True

    pairs = proximity.cache[instance]
    neighbour = [which for which, dist in pairs if radius >= dist]

    return neighbour
Beispiel #4
0
def find_neighbour(instance, dataset, radius, distance):
    ''' find all neighbour within radius '''
    global cached
    if not cached:
        proximity.build_cache(dataset, distance)
        cached = True

    pairs = proximity.cache[instance]
    neighbour = [which for which, dist in pairs if radius >= dist]

    return neighbour