Esempio n. 1
0
def main():
    t = {}
    for k,v in NAME2ID.iteritems():
        t[name_tidy(k)] = v
    print "#coding: utf-8"
    print "NAME2ID = ",
    pprint(t)
Esempio n. 2
0
def main():
    t = {}
    for k, v in NAME2ID.iteritems():
        t[name_tidy(k)] = v
    print "#coding: utf-8"
    print "NAME2ID = ",
    pprint(t)
Esempio n. 3
0
def main():
    id2alias = {}
    for zhihu_topic in ZHIHU_TOPIC:
        id = zhihu_topic[0]
        alias_list = zhihu_topic[5]
        id2alias[int(id)] = alias_list


    name2id = {}

    for i in Zsite.where(cid=CID_TAG):
        tag_list = map(str.strip, i.name.split("/"))
        zhihu_id = MY2ID[i.id]
        alias_list = id2alias.get(zhihu_id,())

        tag_list.extend(alias_list)
       
        for name in tag_list:
            name2id[name.lower()] = i.id

    print """
#coding:utf-8

NAME2ID = """,
    pprint(name2id)
Esempio n. 4
0
#    NAME_RANK[name] = rank
#    if _name != name:
#        NAME_RANK[_name] = name

count = 0

MY2Z = {}
for i in Zsite.where(cid=CID_TAG):
    i.name = i.name.strip()
    i.save()
    if i.name not in NAME_ID:
        for j in map(str.strip,i.name.split("/")):
            if j in NAME_ID:
                MY2Z[i.id] = NAME_ID[j] 
                break
    else:
        MY2Z[i.id] = NAME_ID[i.name]
    if i.id not in MY2Z:
        count += 1
        #print count, '%s|'%i.name

import _env
from zkit.pprint import pprint
from yajl import dumps

pprint(dict((v,k) for k,v in MY2Z.iteritems()))
#print dumps(NAME_KEYWORD)
#print dumps(NAME_RANK)


Esempio n. 5
0
sp2id = defaultdict(list)

for k, v in NAME2ID.iteritems():
    for i in sp_txt(k):
        sp2id[i].append(k)

word_parent = defaultdict(set)

for k, v in NAME2ID.iteritems():
    for i in sp_txt(k):
        for j in sp2id[i]:
            if j != k and k in j:
                #print k, j
                word_parent[NAME2ID[j]].add(NAME2ID[k])

id2name = dict((k, v) for v, k in NAME2ID.iteritems())

#for id, pid_list in word_parent.iteritems():
#    print id2name[id]
#    for i in pid_list:
#        print id2name[i],
#    print "\n" 

word_parent = dict((k, tuple(v)) for k, v in word_parent.iteritems())

print 'PTAG = ',
pprint(word_parent)


Esempio n. 6
0
#coding:utf-8
import _env
from zkit.google.greader import Reader
from config import GREADER_USERNAME, GREADER_PASSWORD
from zkit.pprint import pprint

reader = Reader(GREADER_USERNAME, GREADER_PASSWORD)

result = []
for feed in reader.feed("feed/http://book.douban.com/feed/review/book"):
    pprint(feed)
    data = {}
    data['title'] = feed['title']
    data['author'] = feed['author']
    data['content'] = feed['content']
    data['updated'] = feed['updated']
    data['id'] = feed['id']
    data['published'] = feed['published']

    result.append(data)

if __name__ == "__main__":
    pass
Esempio n. 7
0
#"""
RESULT = {}
for tag in Zsite.where(cid=CID_TAG):
    name_list = map(str.strip, tag.name.split('/'))
    for name in name_list:
        RESULT[name.replace('·', '.').lower()] = tag.id

for i in ZHIHU_TOPIC:
    id = i[0]
    if id not in ID2MY:
        continue
    rename = i[-1]
    for name in rename:
        name = name.lower()
        if name in RESULT and RESULT[name] != ID2MY[id]:
            zsite = Zsite.mc_get(ID2MY[id])
            print zsite.id, zsite.name
            zsite = Zsite.mc_get(RESULT[name])
            print zsite.id, zsite.name
        else:
            continue


print """
#coding:utf-8
TAG2ID = 
""",
pprint(RESULT)


#coding:utf-8
import _env
from zkit.google.greader import Reader
from config import GREADER_USERNAME, GREADER_PASSWORD
from zkit.pprint import pprint

reader = Reader(GREADER_USERNAME, GREADER_PASSWORD)

result = []
for feed in reader.feed("feed/http://book.douban.com/feed/review/book"):
    pprint(feed)
    data = {}
    data['title'] = feed['title']
    data['author'] = feed['author']
    data['content'] = feed['content']
    data['updated'] = feed['updated']
    data['id'] = feed['id']
    data['published'] = feed['published']    
    
    result.append(data)


if __name__ == "__main__":
    pass



Esempio n. 9
0
#    from zkit.pprint import pprint
#
#    pprint(url2id)
#"""
RESULT = {}
for tag in Zsite.where(cid=CID_TAG):
    name_list = map(str.strip, tag.name.split('/'))
    for name in name_list:
        RESULT[name.replace('·', '.').lower()] = tag.id

for i in ZHIHU_TOPIC:
    id = i[0]
    if id not in ID2MY:
        continue
    rename = i[-1]
    for name in rename:
        name = name.lower()
        if name in RESULT and RESULT[name] != ID2MY[id]:
            zsite = Zsite.mc_get(ID2MY[id])
            print zsite.id, zsite.name
            zsite = Zsite.mc_get(RESULT[name])
            print zsite.id, zsite.name
        else:
            continue

print """
#coding:utf-8
TAG2ID = 
""",
pprint(RESULT)
Esempio n. 10
0
from collections import defaultdict
from zkit.pprint import pprint

sp2id = defaultdict(list)

for k, v in NAME2ID.iteritems():
    for i in sp_txt(k):
        sp2id[i].append(k)

word_parent = defaultdict(set)

for k, v in NAME2ID.iteritems():
    for i in sp_txt(k):
        for j in sp2id[i]:
            if j != k and k in j:
                #print k, j
                word_parent[NAME2ID[j]].add(NAME2ID[k])

id2name = dict((k, v) for v, k in NAME2ID.iteritems())

#for id, pid_list in word_parent.iteritems():
#    print id2name[id]
#    for i in pid_list:
#        print id2name[i],
#    print "\n"

word_parent = dict((k, tuple(v)) for k, v in word_parent.iteritems())

print 'PTAG = ',
pprint(word_parent)