def dump(): for f in config.CACHE.files(): d = Document(f) m = d.parse_notes() if not m['author']: # skip skid marks with out annotated authors. continue author = ' '.join(map(lastname, m['author'])) title = remove_stopwords(m['title']) title = re.findall('\w+', title) year = m['year'][-2:] title = ' '.join(title) author = author.replace('-', ' ') title = title.replace('-', ' ') year = year.replace('-', ' ') key = '%s-%s-%s' % (author, year, title) key = key.lower() print key.encode('utf8')
def search(q, limit=None): q = unicode(q.decode('utf8')) ix = open_dir(DIRECTORY, NAME) with ix.searcher() as searcher: qp = MultifieldParser( fieldnames=[ 'title', 'author', 'tags', 'notes', 'text', 'source', # 'cached', 'year' ], fieldboosts={ 'title': 7, 'year': 6, 'author': 10, 'tags': 4, 'notes': 2, 'text': 1 }, schema=ix.schema) # Whoosh chokes on queries with stop words, so remove them. q = remove_stopwords(q) q = qp.parse(q) for hit in searcher.search(q, limit=limit): yield hit
def search(q, limit=None): # q = str(q) ix = open_dir(DIRECTORY, NAME) with ix.searcher() as searcher: qp = MultifieldParser(fieldnames=['title', 'author', 'tags', 'notes', 'text', 'source', # 'cached', 'year'], fieldboosts={'title': 7, 'year': 6, 'author': 10, 'tags': 4, 'notes': 2, 'text': 1}, schema=ix.schema) # Whoosh chokes on queries with stop words, so remove them. q = remove_stopwords(q) q = qp.parse(q) for hit in searcher.search(q, limit=limit): yield hit