def edit(req, id, docname): docid = atoi(id) doc = Doc.try_get(id=docid) if not doc: raise Http404("Document #%d (%s) does not exist." % (docid, id)) doc.use_latest() page = doc.get_edit_url() dict = {} dict['alltags'] = _alltags() dict['alldocs'] = Doc dict['menuitems'] = [ ('/kb/', 'Knowledgebase'), ] if len(doc.tags) > 0: t = doc.tags[0] dict['menuitems'].append(('/kb/%s' % t, t)) dict['menuitems'].append((doc.get_url(), 'KB%d' % doc.id)) dict['menuitems'].append((doc.get_edit_url(), '-Edit-')) dict['page'] = page dict['title'] = doc.title dict['tags'] = join(', ', doc.tags) dict['uploadurl'] = doc.get_upload_url() dict['text'] = doc.text return render_to_response('ekb/edit.html', dict)
def _calc_word_frequencies(): print 'Deleting all wordweights' db.run('delete from WordWeights') db.run('delete from Words') totals = {} for doc in Doc.search(): print ' %s' % doc.filename textbits = [doc.title, doc.title, # title gets bonus points doc.filename, doc.expanded_text(lambda x: x, headerdepth=1, expandbooks=1)] textbits += doc.tags fulltext = join(' ', textbits) words = [w.lower() for w in re.findall(r"(\w+(?:[.'#%@]\w+)?)", fulltext)] total = len(words)*1.0 wordcounts = {} echo(' %d total words' % total) for w in words: wordcounts[w] = wordcounts.get(w, 0) + 1 echo(', %d unique' % len(wordcounts.keys())) new = 0 for w,count in wordcounts.iteritems(): if not w in totals: totals[w] = 0 new += 1 totals[w] += count db.run('insert into WordWeights (docid, word, weight) ' ' values (?,?,?)', doc.id, w, (count/total)**.5) echo(', %d new\n' % new) print ' %d total unique words' % len(totals) print 'Saving words' for word,count in totals.iteritems(): db.run('insert into Words (word, total) values (?,?)', word, count)
def __init__(self, want_words, highlight_tag): self.wordsub = None self.wordfix = None self.replacement = None want_esc = join("|", [re.escape(x) for x in want_words]) if want_esc and highlight_tag: self.wordsub = re.compile('(' + want_esc + ')', re.I) htag = re.sub(r'\W', '_', highlight_tag) self.wordfix = re.compile('<%s>(.*?)</%s>' % (htag, htag)) self.replacement = u'<%s>\\1</%s>' % (htag, htag)