def update_standard_tags():
    stdtag = StandardTags()
    total = db.tags.find().count()
    logging.debug('remaining %d tags without standard.' % total)
    for i, t in enumerate(
            db.tags.find(timeout=False)
    ):  # {"$where":"this.standard == null"}, timeout=False
        taglst = stdtag.transform(t['name'])
        if not taglst or taglst[0][0] == t['name']:
            logging.debug('skip tag: %s', t['name'])
            t['standard'] = []
        else:
            t['standard'] = taglst
            logging.debug('%d, tag update standard %s --> %s' %
                          (i, t['name'], taglst[0][0]))
        ret = db.tags.update({"_id": t['_id']}, t)
        if not ret['ok']:
            logging.warn('tag update failed. tag: %s' % t['name'])
        prog_d('tag update', i, total)
Esempio n. 2
0
                # prog_d('solve Matrix row', c, total)
                # c += 1
            
        return MImatrix

    def _calMIvalue(self, a_set, b_set):
        pab = math.fabs(float(len(a_set&b_set)) / self.root)
        pa  = math.fabs(float(len(a_set)) / self.root)
        pb  = math.fabs(float(len(b_set)) / self.root)
        Iab = pab * math.log((pab+1) / (pa*pb))
        Ha  = -pa * math.log(pa)
        Hb  = -pb * math.log(pb)
        return float(Iab) / (float(Ha + Hb) / 2)

rsdb   = RecsysDatabase()
stdtag = StandardTags()

# set PROG before using this function
def prog_d(dstr, line=-1, total=100):
    global PROG_REC
    if line >= 0:
        progress = int(float(line)/float(total) * 100 + 1)
        if progress not in PROG_SCALE or progress == PROG_REC:
            return
        # print progress
        PROG_REC = progress
        dstr += ' %d%%(%d/%d) -=-=-' % (progress, line, total)
        logging.info('-=-=- Processing ' + dstr)
    else:
        logging.info('-=-=- Finishing ' + dstr)