def cluster(similarity=pearson): db = SQLite3().cursor() pins = Pins() words = Words() maxcount = pins.size() labels = [r[0] for r in words.set()] offset = 0 n = 1 sum_num = sum([i for i in range(1, maxcount)]) while True: pin_a = pins.find_by_offset(offset) pin_a_words = [w[0] for w in words.find_by_pinid(pin_a[0])] pin_a_wordcount = [pin_a_words.count(w) for w in list(set(labels))] # calculate distance of two pins. for i in range(offset + 1, maxcount): pin_b = pins.find_by_offset(i) pin_b_words = [w[0] for w in words.find_by_pinid(pin_b[0])] pin_b_wordcount = [pin_b_words.count(w) for w in list(set(labels))] # calculate distance of two pins print '[%s] %s / %s calculate score of %s and %s' % ( datetime.today().strftime('%Y-%m-%d %H:%M:%S'), n, sum_num, pin_a[0], pin_b[0]) sim = 1.0 - similarity(pin_a_wordcount, pin_b_wordcount) # save distance to database for cache clusters = Clusters() clusters.data['pin_id_a'] = pin_a[0] clusters.data['pin_id_b'] = pin_b[0] clusters.data['score'] = sim clusters.save() n += 1 if offset >= maxcount - 1: break offset += 1
def save(self): id = self.get_image_id() if not id: return 0 pins = Pins() pins.data['pin_id'] = id pins.data['image_url'] = self.get_image() pins.data['description'] = self.get_description() pins.data['refer_url'] = self.get_refer_url() if not pins.save(): return 0 repins = self.get_repins() wordcount = {} for user in repins: words = Words() words.data['pin_id'] = id words.data['user'] = user # delete and insert in preperation for change of board name. words.delete() for w in repins[user]: words.data['word'] = w words.save() return 1
def get_candidates(offset=0, num=20, threshold=0.5): pins = Pins() pins = pins.get_candidates(offset, num, threshold) return [{'pin_id': str(pin[0]), 'uri': str(pin[1])} for pin in pins]