예제 #1
0
파일: cnanalyzer.py 프로젝트: fay/wt
def c():
    from apps.wantown import dao
    from apps.wantown.models import Entry,Category
    entries = Entry.objects.all()
    from dot.matrixmapper import MatrixMapper
    STOP_WORDS = [u'a', u'an', u'and', u'are', u'as', u'at', u'be', u'but', u'by', u'for', u'if', u'in', u'into', 
              u'is', u'it', u'no', u'not', u'of', u'on', u'or', u'such', u'that', u'the', u'their', u'then',
              u'there', u'these', u'they', u'this', u'to', u'was', u'will', u'with',
              u'you',u'your',u'we',u'he',u'him',u'how',u'where',
              # add by myself
              u'i',u'been',u'about',u'们',u'这',u'那',u'的',u'己',u'个',u'我',u'你',u'很',u'了',u'是',u'以',u'过',u'一',u'么',u'没',u'在']
    mapper = MatrixMapper(STOP_WORDS)
    ireader = IndexReader.open(STORE_DIR)
    for i in range(len(entries)):
        try:
            doc = ireader.document(i)
            link = doc.get('link')
            entry = dao.get_by_link(link, Entry)
            category = mapper.build([doc])
            weight = 0
            if category:
                cat = category[0].text
                weight = category[0].label_weight
            else:
                cat = '其他'
            entry.category = dao.save_category(cat,weight,'s')
            entry.save()
        except Exception,e:
            print i,e
예제 #2
0
파일: matrixmapper.py 프로젝트: fay/wt
 def run (self):
     #for l in label_doc:
         #dao.save_qc_with_weight(self.query, l[0], l[1])
     for k,v in self.all_labels.items():
         for label in v:
             try:
                 q = dao.distinct_query(self.query)
                 category = dao.save_category(label.text, label.id, 'd')
                 entry = self.entries[k]
                 ec = dao.save_entry_cat(q,entry, category, label.id)
             except Exception,e:
                 print e
예제 #3
0
파일: fetcher.py 프로젝트: fay/wt
def fetch_entries(feed,entries):

    for entry in entries:
        entry_link = entry['link']  
        entry_model = dao.get_by_link(entry_link, Entry)
        if not entry_model:
            entry_model = Entry(feed=feed, link=entry_link)
        else:
            continue
        entry_model.title = entry['title']
        if len(entry_model.title) >= 200:
            continue
        entry_model.author = entry.get('author', 'unknow')
        entry_model.summary = entry.get('summary', '')
        if not entry_model.summary:
            content = entry.get('content', '')
            try:
                entry_model.summary = (type(content) == unicode and content) or content[0].get('value', '')
            except:
                continue
        #clear html tags
        entry_model.summary = strip_tags(entry_model.summary)
        if len(entry_model.summary) <= 100:
            return 
        entry_model.when = entry.get('updated_parsed','') or time.localtime(entry.get('updated'))
        if entry_model.when:
            entry_model.when = datetime.datetime(entry_model.when[0],entry_model.when[1],entry_model.when[2],entry_model.when[3],entry_model.when[4])
        tags = None
        if entry.has_key('tags'):
            tags = entry.get('tags', '')
            tags = tags[0].get('term','')
        if not tags and entry.has_key('categories'):
            tags = entry.get('categories')
            tags = tags.values()[0]
        if not tags:
            print 'no tags.ignored...'
            #continue
        else:
            cat = dao.save_category(tags)
            entry_model.category = cat
        try:
            dao.save_model(entry_model)
        except Exception,e:
            print 'save error:',e
예제 #4
0
파일: views.py 프로젝트: fay/wt
def post(request, step):
    if request.method == 'POST':
        who = request.user
        what = request.POST['what']
        which = request.POST['which']
        data = ''
        if step == '1':
            data = {'what':what, 'which':which}
            return render_to_response('post.html', {'object':data}, context_instance=RequestContext(request))
        elif step == '2':
            available = request.POST.get('available', 'no')
            category = request.POST.get('category', '无').decode('utf-8')
            if not category:
                category = '无'.decode('utf-8')
            if not who:
                email = request.POST.get('email', '*****@*****.**').decode('utf-8')
                if not email:
                    email = '*****@*****.**'                
                who = AnonymousUser(email)
            data = dao.save(who, what, which, dao.save_category(category), available)
            return HttpResponseRedirect('/wantown/view/id/' + data.__str__())
    #用户直接输入这个url,则返回到首页
    return HttpResponseRedirect('/')