def handle(self, *args, **opts): from catalogue.models import Book from search.index import Index idx = Index() if not opts['just_tags']: if args: books = [] for a in args: if opts['book_id']: books += Book.objects.filter(id=int(a)).all() else: books += Book.objects.filter(slug=a).all() else: books = list(Book.objects.all()) while books: try: b = books[0] print b.title idx.index_book(b) idx.index.commit() books.pop(0) except Exception, e: print "Error occured: %s" % e try: # we might not be able to rollback idx.index.rollback() except: pass retry = query_yes_no("Retry?") if not retry: break
def setUp(self): WLTestCase.setUp(self) index = Index() index.index.delete_all() index.index.commit() self.do_doktora = Book.from_xml_file(get_fixture('do-doktora.xml')) self.do_anusie = Book.from_xml_file( get_fixture('fraszka-do-anusie.xml', catalogue))
def search_index(self, book_info=None, index=None, index_tags=True, commit=True): if index is None: from search.index import Index index = Index() try: index.index_book(self, book_info) if index_tags: index.index_tags() if commit: index.index.commit() except Exception, e: index.index.rollback() raise e
def handle(self, *directories, **options): self.style = color_style() verbose = options.get('verbose') import_picture = options.get('import_picture') if options.get('search_index') and not settings.NO_SEARCH_INDEX: index = Index() try: index.index_tags() index.index.commit() except Exception, e: index.index.rollback() raise e
import requests from download import download_wikipedia_abstracts from load import load_documents from search.timing import timing from search.index import Index @timing def index_documents(documents, index): for i, document in enumerate(documents): index.index_document(document) if i % 5000 == 0: print(f'Indexed {i} documents', end='\r') return index if __name__ == '__main__': # this will only download the xml dump if you don't have a copy already; # just delete the file if you want a fresh copy if not os.path.exists('data/enwiki.latest-abstract.xml.gz'): download_wikipedia_abstracts() index = index_documents(load_documents(), Index()) print(f'Index contains {len(index.documents)} documents') index.search('London Beer Flood', search_type='AND') index.search('London Beer Flood', search_type='OR') index.search('London Beer Flood', search_type='AND', rank=True) index.search('London Beer Flood', search_type='OR', rank=True)
app.add_middleware( CORSMiddleware, allow_origins=(eval(cfg["middleware"]["ALLOWED_HOSTS"]), allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # search interface @app.get("/search/") async def search( q: str = Query(None, max_length=280), page: Optional[int] = Query( None, ge=eval(cfg["search"]["pg_range"])["ge"], le=eval(cfg["search"]["pg_range"])["le"], ), ): return Search()._query(q, page) # Re-queries and populates database at scheduled time # Use cron expression to set refresh rate @aiocron.crontab(cfg["CRAWLER"]["refresh_rate"]) async def background_process(): start_crawl = Crawler() index = Index()._create()
def update_index(sender, instance, **kwargs): from search.index import Index idx = Index() idx.index_tags(instance, remove_only='created' not in kwargs)