예제 #1
0
    def handle(self, *args, **opts):
        from catalogue.models import Book
        from search.index import Index
        idx = Index()
        
        if not opts['just_tags']:
            if args:
                books = []
                for a in args:
                    if opts['book_id']:
                        books += Book.objects.filter(id=int(a)).all()
                    else:
                        books += Book.objects.filter(slug=a).all()
            else:
                books = list(Book.objects.all())

            while books:
                try:
                    b = books[0]
                    print b.title
                    idx.index_book(b)
                    idx.index.commit()
                    books.pop(0)
                except Exception, e:
                    print "Error occured: %s" % e
                    try:
                        # we might not be able to rollback
                        idx.index.rollback()
                    except:
                        pass
                    retry = query_yes_no("Retry?")
                    if not retry:
                        break
예제 #2
0
    def setUp(self):
        WLTestCase.setUp(self)
        index = Index()
        index.index.delete_all()
        index.index.commit()

        self.do_doktora = Book.from_xml_file(get_fixture('do-doktora.xml'))
        self.do_anusie = Book.from_xml_file(
            get_fixture('fraszka-do-anusie.xml', catalogue))
예제 #3
0
 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
     if index is None:
         from search.index import Index
         index = Index()
     try:
         index.index_book(self, book_info)
         if index_tags:
             index.index_tags()
         if commit:
             index.index.commit()
     except Exception, e:
         index.index.rollback()
         raise e
예제 #4
0
    def handle(self, *directories, **options):
        self.style = color_style()

        verbose = options.get('verbose')
        import_picture = options.get('import_picture')

        if options.get('search_index') and not settings.NO_SEARCH_INDEX:
            index = Index()
            try:
                index.index_tags()
                index.index.commit()
            except Exception, e:
                index.index.rollback()
                raise e
예제 #5
0
import requests

from download import download_wikipedia_abstracts
from load import load_documents
from search.timing import timing
from search.index import Index


@timing
def index_documents(documents, index):
    for i, document in enumerate(documents):
        index.index_document(document)
        if i % 5000 == 0:
            print(f'Indexed {i} documents', end='\r')
    return index


if __name__ == '__main__':
    # this will only download the xml dump if you don't have a copy already;
    # just delete the file if you want a fresh copy
    if not os.path.exists('data/enwiki.latest-abstract.xml.gz'):
        download_wikipedia_abstracts()

    index = index_documents(load_documents(), Index())
    print(f'Index contains {len(index.documents)} documents')

    index.search('London Beer Flood', search_type='AND')
    index.search('London Beer Flood', search_type='OR')
    index.search('London Beer Flood', search_type='AND', rank=True)
    index.search('London Beer Flood', search_type='OR', rank=True)
예제 #6
0
app.add_middleware(
    CORSMiddleware,
    allow_origins=(eval(cfg["middleware"]["ALLOWED_HOSTS"]),
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# search interface
@app.get("/search/")
async def search(
    q: str = Query(None, max_length=280),
    page: Optional[int] = Query(
        None,
        ge=eval(cfg["search"]["pg_range"])["ge"],
        le=eval(cfg["search"]["pg_range"])["le"],
    ),
):

    return Search()._query(q, page)


# Re-queries and populates database at scheduled time
# Use cron expression to set refresh rate
@aiocron.crontab(cfg["CRAWLER"]["refresh_rate"])
async def background_process():
    start_crawl = Crawler()
    index = Index()._create()
예제 #7
0
 def update_index(sender, instance, **kwargs):
     from search.index import Index
     idx = Index()
     idx.index_tags(instance, remove_only='created' not in kwargs)