Exemple #1
0
def cluster(similarity=pearson):
    db = SQLite3().cursor()
    pins = Pins()
    words = Words()
    maxcount = pins.size()
    labels = [r[0] for r in words.set()]
    offset = 0
    n = 1
    sum_num = sum([i for i in range(1, maxcount)])
    while True:
        pin_a = pins.find_by_offset(offset)
        pin_a_words = [w[0] for w in words.find_by_pinid(pin_a[0])]
        pin_a_wordcount = [pin_a_words.count(w) for w in list(set(labels))]
        # calculate distance of two pins.
        for i in range(offset + 1, maxcount):
            pin_b = pins.find_by_offset(i)
            pin_b_words = [w[0] for w in words.find_by_pinid(pin_b[0])]
            pin_b_wordcount = [pin_b_words.count(w) for w in list(set(labels))]
            # calculate distance of two pins
            print '[%s] %s / %s calculate score of %s and %s' % (
                datetime.today().strftime('%Y-%m-%d %H:%M:%S'), n, sum_num,
                pin_a[0], pin_b[0])
            sim = 1.0 - similarity(pin_a_wordcount, pin_b_wordcount)
            # save distance to database for cache
            clusters = Clusters()
            clusters.data['pin_id_a'] = pin_a[0]
            clusters.data['pin_id_b'] = pin_b[0]
            clusters.data['score'] = sim
            clusters.save()
            n += 1
        if offset >= maxcount - 1:
            break
        offset += 1
Exemple #2
0
#!/usr/bin/python
import os, sys, cgi, json, site

site.addsitedir(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../core'))

from db.mapper import Pins, Clusters

if 'QUERY_STRING' in os.environ:
    query = cgi.parse_qs(os.environ['QUERY_STRING'])
else:
    query = {}

clusters = Clusters()
pins = clusters.get_top_matches(query['pin_id'][0], 50, 0.5)
res = [{'score':str(pin[0]), 'uri':str(pin[2]), 'pin_id': str(pin[3])} for pin in pins]

print "Content-Type: text/json\n\n"
print json.dumps(res)