Example #1
0
def feed_import_new(zsite_id, rid, title, txt, url, rank):
    title = utf8_ftoj(unescape(title))
    txt = utf8_ftoj(format_txt(txt))

    if import_feed_duplicator.txt_is_duplicate(txt):
        return
    #print zsite_id, rid, title
    #sleep(0.1)

    feed_user = user_by_feed_id_zsite_id(zsite_id, rid)
    if feed_user:
        po_meta_user_id = feed_user.id
    else:
        po_meta_user_id = 0

    new_feed = FeedImport(title=title,
                          txt=txt,
                          zsite_id=zsite_id,
                          rid=rid,
                          url=url,
                          tag_id_list='',
                          state=FEED_IMPORT_STATE_WITHOUT_TAG,
                          rank=rank,
                          po_meta_user_id=po_meta_user_id)

    new_feed.save()
    id = new_feed.id
    import_feed_duplicator.set_record(txt, id)

    if feed_user:
        user_id = feed_user.user_id
        if user_id:
            feed_import_user_new(user_id, id)

    return new_feed
Example #2
0
def train(filename, parser):
    fname = basename(filename)
    cache_path = join(CACHE_PATH, fname)
    if exists(cache_path):
        return

    word2tag_count = {}
    for tag_id_list, txt in parser(filename):
        if not txt.strip():
            continue

        tag_id_set = set(tag_id_list)
        if not tag_id_set:
            continue

        for tid in tuple(tag_id_set):
            tag_id_set.update(PTAG.get(tid, ()))

        word2count = defaultdict(int)
        word_list = list(seg_txt(utf8_ftoj(str(txt))))
        for i in word_list:
            word2count[i] += 1

        for k, v in word2count.iteritems():
            if k not in word2tag_count:
                word2tag_count[k] = {}
            t = word2tag_count[k]
            for id in tag_id_set:
                if id not in t:
                    t[id] = 0
                t[id] += (1+log(float(v)))

    tofromfile.tofile(cache_path, word2tag_count)
Example #3
0
def train(filename, parser):
    fname = basename(filename)
    cache_path = join(CACHE_PATH, fname)
    if exists(cache_path):
        return

    word2tag_count = {}
    for tag_id_list, txt in parser(filename):
        if not txt.strip():
            continue

        tag_id_set = set(tag_id_list)
        if not tag_id_set:
            continue

        for tid in tuple(tag_id_set):
            tag_id_set.update(PTAG.get(tid, ()))

        word2count = defaultdict(int)
        word_list = list(seg_txt(utf8_ftoj(str(txt))))
        for i in word_list:
            word2count[i] += 1

        for k, v in word2count.iteritems():
            if k not in word2tag_count:
                word2tag_count[k] = {}
            t = word2tag_count[k]
            for id in tag_id_set:
                if id not in t:
                    t[id] = 0
                t[id] += (1 + log(float(v)))

    tofromfile.tofile(cache_path, word2tag_count)
def feed_import_new(zsite_id, rid, title, txt, url,  rank):
    title = utf8_ftoj(unescape(title))
    txt = utf8_ftoj(format_txt(txt))

    if import_feed_duplicator.txt_is_duplicate(txt):
        return
    #print zsite_id, rid, title
    #sleep(0.1)

    feed_user = user_by_feed_id_zsite_id(zsite_id, rid)
    if feed_user:
        po_meta_user_id = feed_user.id
    else:
        po_meta_user_id = 0

    new_feed = FeedImport(
        title=title,
        txt=txt,
        zsite_id=zsite_id,
        rid=rid,
        url=url,
        tag_id_list='',
        state=FEED_IMPORT_STATE_WITHOUT_TAG,
        rank=rank,
        po_meta_user_id=po_meta_user_id    
    )

    new_feed.save()
    id = new_feed.id
    import_feed_duplicator.set_record(txt, id)

    if feed_user:
        user_id = feed_user.user_id
        if user_id:
            feed_import_user_new(user_id, id)
    
    return new_feed
Example #5
0
def name_tidy(name):
    return utf8_ftoj(str(name)).lower().replace("·"," ").replace("《","").replace("》","").replace("(","(").split("(",1)[0]
Example #6
0
def name_tidy(name):
    return utf8_ftoj(str(name)).lower().replace("·", " ").replace(
        "《", "").replace("》", "").replace("(", "(").split("(", 1)[0]
Example #7
0
def txt2word(txt):
    return seg_txt(
        utf8_ftoj(str(txt.lower()))
    )
Example #8
0
def txt2word(txt):
    return seg_txt(utf8_ftoj(str(txt.lower())))