def c(): from apps.wantown import dao from apps.wantown.models import Entry,Category entries = Entry.objects.all() from dot.matrixmapper import MatrixMapper STOP_WORDS = [u'a', u'an', u'and', u'are', u'as', u'at', u'be', u'but', u'by', u'for', u'if', u'in', u'into', u'is', u'it', u'no', u'not', u'of', u'on', u'or', u'such', u'that', u'the', u'their', u'then', u'there', u'these', u'they', u'this', u'to', u'was', u'will', u'with', u'you',u'your',u'we',u'he',u'him',u'how',u'where', # add by myself u'i',u'been',u'about',u'们',u'这',u'那',u'的',u'己',u'个',u'我',u'你',u'很',u'了',u'是',u'以',u'过',u'一',u'么',u'没',u'在'] mapper = MatrixMapper(STOP_WORDS) ireader = IndexReader.open(STORE_DIR) for i in range(len(entries)): try: doc = ireader.document(i) link = doc.get('link') entry = dao.get_by_link(link, Entry) category = mapper.build([doc]) weight = 0 if category: cat = category[0].text weight = category[0].label_weight else: cat = '其他' entry.category = dao.save_category(cat,weight,'s') entry.save() except Exception,e: print i,e
def run (self): #for l in label_doc: #dao.save_qc_with_weight(self.query, l[0], l[1]) for k,v in self.all_labels.items(): for label in v: try: q = dao.distinct_query(self.query) category = dao.save_category(label.text, label.id, 'd') entry = self.entries[k] ec = dao.save_entry_cat(q,entry, category, label.id) except Exception,e: print e
def fetch_entries(feed,entries): for entry in entries: entry_link = entry['link'] entry_model = dao.get_by_link(entry_link, Entry) if not entry_model: entry_model = Entry(feed=feed, link=entry_link) else: continue entry_model.title = entry['title'] if len(entry_model.title) >= 200: continue entry_model.author = entry.get('author', 'unknow') entry_model.summary = entry.get('summary', '') if not entry_model.summary: content = entry.get('content', '') try: entry_model.summary = (type(content) == unicode and content) or content[0].get('value', '') except: continue #clear html tags entry_model.summary = strip_tags(entry_model.summary) if len(entry_model.summary) <= 100: return entry_model.when = entry.get('updated_parsed','') or time.localtime(entry.get('updated')) if entry_model.when: entry_model.when = datetime.datetime(entry_model.when[0],entry_model.when[1],entry_model.when[2],entry_model.when[3],entry_model.when[4]) tags = None if entry.has_key('tags'): tags = entry.get('tags', '') tags = tags[0].get('term','') if not tags and entry.has_key('categories'): tags = entry.get('categories') tags = tags.values()[0] if not tags: print 'no tags.ignored...' #continue else: cat = dao.save_category(tags) entry_model.category = cat try: dao.save_model(entry_model) except Exception,e: print 'save error:',e
def post(request, step): if request.method == 'POST': who = request.user what = request.POST['what'] which = request.POST['which'] data = '' if step == '1': data = {'what':what, 'which':which} return render_to_response('post.html', {'object':data}, context_instance=RequestContext(request)) elif step == '2': available = request.POST.get('available', 'no') category = request.POST.get('category', '无').decode('utf-8') if not category: category = '无'.decode('utf-8') if not who: email = request.POST.get('email', '*****@*****.**').decode('utf-8') if not email: email = '*****@*****.**' who = AnonymousUser(email) data = dao.save(who, what, which, dao.save_category(category), available) return HttpResponseRedirect('/wantown/view/id/' + data.__str__()) #用户直接输入这个url,则返回到首页 return HttpResponseRedirect('/')