def update_standard_tags(): stdtag = StandardTags() total = db.tags.find().count() logging.debug('remaining %d tags without standard.' % total) for i, t in enumerate( db.tags.find(timeout=False) ): # {"$where":"this.standard == null"}, timeout=False taglst = stdtag.transform(t['name']) if not taglst or taglst[0][0] == t['name']: logging.debug('skip tag: %s', t['name']) t['standard'] = [] else: t['standard'] = taglst logging.debug('%d, tag update standard %s --> %s' % (i, t['name'], taglst[0][0])) ret = db.tags.update({"_id": t['_id']}, t) if not ret['ok']: logging.warn('tag update failed. tag: %s' % t['name']) prog_d('tag update', i, total)
# prog_d('solve Matrix row', c, total) # c += 1 return MImatrix def _calMIvalue(self, a_set, b_set): pab = math.fabs(float(len(a_set&b_set)) / self.root) pa = math.fabs(float(len(a_set)) / self.root) pb = math.fabs(float(len(b_set)) / self.root) Iab = pab * math.log((pab+1) / (pa*pb)) Ha = -pa * math.log(pa) Hb = -pb * math.log(pb) return float(Iab) / (float(Ha + Hb) / 2) rsdb = RecsysDatabase() stdtag = StandardTags() # set PROG before using this function def prog_d(dstr, line=-1, total=100): global PROG_REC if line >= 0: progress = int(float(line)/float(total) * 100 + 1) if progress not in PROG_SCALE or progress == PROG_REC: return # print progress PROG_REC = progress dstr += ' %d%%(%d/%d) -=-=-' % (progress, line, total) logging.info('-=-=- Processing ' + dstr) else: logging.info('-=-=- Finishing ' + dstr)