Example #1
0
    def handle(self, *directories, **options):
        from django.db import transaction

        self.style = color_style()

        verbose = options.get('verbose')
        import_picture = options.get('import_picture')

        wait_until = None
        if options.get('wait_until'):
            wait_until = time.mktime(time.strptime(options.get('wait_until'), '%Y-%m-%d %H:%M:%S'))
            if verbose > 0:
                print "Will wait until %s; it's %f seconds from now" % (
                    time.strftime('%Y-%m-%d %H:%M:%S',
                    time.localtime(wait_until)), wait_until - time.time())

        index = None
        if options.get('search_index') and not settings.NO_SEARCH_INDEX:
            index = Index()
            try:
                index.index_tags()
                index.index.commit()
            except Exception, e:
                index.index.rollback()
                raise e
Example #2
0
    def handle(self, *args, **opts):
        from catalogue.models import Book
        from search.index import Index
        idx = Index()
        
        if not opts['just_tags']:
            if args:
                books = []
                for a in args:
                    if opts['book_id']:
                        books += Book.objects.filter(id=int(a)).all()
                    else:
                        books += Book.objects.filter(slug=a).all()
            else:
                books = list(Book.objects.all())

            while books:
                try:
                    b = books[0]
                    print b.title
                    idx.index_book(b)
                    idx.index.commit()
                    books.pop(0)
                except Exception, e:
                    print "Error occured: %s" % e
                    try:
                        # we might not be able to rollback
                        idx.index.rollback()
                    except:
                        pass
                    retry = query_yes_no("Retry?")
                    if not retry:
                        break
Example #3
0
def book_delete(sender, instance, **kwargs):
    caches[settings.CACHE_MIDDLEWARE_ALIAS].clear()
    flush_ssi_includes([
        '/katalog/%s.json' % lang
        for lang in [lc for (lc, _ln) in settings.LANGUAGES]])

    if not settings.NO_SEARCH_INDEX:
        # remove the book from search index, when it is deleted.
        from search.index import Index
        idx = Index()
        idx.remove_book(instance)
        idx.index_tags()
Example #4
0
    def setUp(self):
        WLTestCase.setUp(self)

        index = Index()
        self.search = Search()
        index.delete_query(self.search.index.query(uid="*"))
        index.index.commit()

        self.do_doktora = Book.from_xml_file(
            get_fixture('do-doktora.xml', opds))
        self.do_anusie = Book.from_xml_file(
            get_fixture('fraszka-do-anusie.xml', catalogue))
Example #5
0
    def handle(self, *directories, **options):
        self.style = color_style()

        verbose = options.get('verbose')
        import_picture = options.get('import_picture')

        if options.get('search_index') and not settings.NO_SEARCH_INDEX:
            index = Index()
            try:
                index.index_tags()
                index.index.commit()
            except Exception, e:
                index.index.rollback()
                raise e
Example #6
0
    def handle(self, *directories, **options):
        self.style = color_style()

        verbose = options.get('verbose')
        import_picture = options.get('import_picture')

        if options.get('search_index') and not settings.NO_SEARCH_INDEX:
            index = Index()
            try:
                index.index_tags()
                index.index.commit()
            except Exception, e:
                index.index.rollback()
                raise e
Example #7
0
    def handle(self, **opts):
        from catalogue.models import Book
        from search.index import Index
        idx = Index()
        
        if not opts['just_tags']:
            if opts['args']:
                books = []
                for a in opts['args']:
                    if opts['book_id']:
                        books += Book.objects.filter(id=int(a)).all()
                    else:
                        books += Book.objects.filter(slug=a).all()
            else:
                books = list(Book.objects.order_by('slug'))
            start_from = opts.get('start_from')
            stop_after = opts.get('stop_after')
            if start_from:
                start_from = start_from.replace('-', '')
            if stop_after:
                stop_after = stop_after.replace('-', '')
            while books:
                try:
                    b = books[0]
                    slug = b.slug.replace('-', '')
                    if stop_after and slug > stop_after:
                        break
                    if not start_from or slug >= start_from:
                        print(b.slug)
                        idx.index_book(b)
                        idx.index.commit()
                    books.pop(0)
                except:
                    traceback.print_exc()
                    try:
                        # we might not be able to rollback
                        idx.index.rollback()
                    except:
                        pass
                    retry = query_yes_no("Retry?")
                    if not retry:
                        break

        print('Reindexing tags.')
        idx.index_tags()
        idx.index.commit()
Example #8
0
    def setUp(self):
        WLTestCase.setUp(self)
        index = Index()
        index.index.delete_all()
        index.index.commit()

        self.do_doktora = Book.from_xml_file(get_fixture('do-doktora.xml'))
        self.do_anusie = Book.from_xml_file(
            get_fixture('fraszka-do-anusie.xml', catalogue))
Example #9
0
 def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
     if index is None:
         from search.index import Index
         index = Index()
     try:
         index.index_book(self, book_info)
         if index_tags:
             index.index_tags()
         if commit:
             index.index.commit()
     except Exception, e:
         index.index.rollback()
         raise e
Example #10
0
import requests

from download import download_wikipedia_abstracts
from load import load_documents
from search.timing import timing
from search.index import Index


@timing
def index_documents(documents, index):
    for i, document in enumerate(documents):
        index.index_document(document)
        if i % 5000 == 0:
            print(f'Indexed {i} documents', end='\r')
    return index


if __name__ == '__main__':
    # this will only download the xml dump if you don't have a copy already;
    # just delete the file if you want a fresh copy
    if not os.path.exists('data/enwiki.latest-abstract.xml.gz'):
        download_wikipedia_abstracts()

    index = index_documents(load_documents(), Index())
    print(f'Index contains {len(index.documents)} documents')

    index.search('London Beer Flood', search_type='AND')
    index.search('London Beer Flood', search_type='OR')
    index.search('London Beer Flood', search_type='AND', rank=True)
    index.search('London Beer Flood', search_type='OR', rank=True)
Example #11
0
app.add_middleware(
    CORSMiddleware,
    allow_origins=(eval(cfg["middleware"]["ALLOWED_HOSTS"]),
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# search interface
@app.get("/search/")
async def search(
    q: str = Query(None, max_length=280),
    page: Optional[int] = Query(
        None,
        ge=eval(cfg["search"]["pg_range"])["ge"],
        le=eval(cfg["search"]["pg_range"])["le"],
    ),
):

    return Search()._query(q, page)


# Re-queries and populates database at scheduled time
# Use cron expression to set refresh rate
@aiocron.crontab(cfg["CRAWLER"]["refresh_rate"])
async def background_process():
    start_crawl = Crawler()
    index = Index()._create()
Example #12
0
 def update_index(sender, instance, **kwargs):
     from search.index import Index
     idx = Index()
     idx.index_tags(instance, remove_only='created' not in kwargs)
Example #13
0
    def handle(self, **options):
        self.style = color_style()

        verbose = options.get('verbose')
        import_picture = options.get('import_picture')

        if options.get('search_index') and not settings.NO_SEARCH_INDEX:
            index = Index()
            try:
                index.index_tags()
                index.index.commit()
            except Exception as e:
                index.index.rollback()
                raise e

        files_imported = 0
        files_skipped = 0

        for dir_name in options['directory']:
            if not os.path.isdir(dir_name):
                print(self.style.ERROR("%s: Not a directory. Skipping." % dir_name))
            else:
                # files queue
                files = sorted(os.listdir(dir_name))
                postponed = {}
                while files:
                    file_name = files.pop(0)
                    file_path = os.path.join(dir_name, file_name)
                    file_base, ext = os.path.splitext(file_path)

                    # Skip files that are not XML files
                    if not ext == '.xml':
                        continue

                    if verbose > 0:
                        print("Parsing '%s'" % file_path)
                    else:
                        sys.stdout.write('.')
                        sys.stdout.flush()

                    # Import book files
                    try:
                        if import_picture:
                            self.import_picture(file_path, options)
                        else:
                            self.import_book(file_path, options)

                        files_imported += 1

                    except (Book.AlreadyExists, Picture.AlreadyExists):
                        print(self.style.ERROR(
                            '%s: Book or Picture already imported. Skipping. To overwrite use --force.' %
                            file_path))
                        files_skipped += 1

                    except Book.DoesNotExist as e:
                        if file_name not in postponed or postponed[file_name] < files_imported:
                            # push it back into the queue, maybe the missing child will show up
                            if verbose:
                                print(self.style.NOTICE('Waiting for missing children'))
                            files.append(file_name)
                            postponed[file_name] = files_imported
                        else:
                            # we're in a loop, nothing's being imported - some child is really missing
                            raise e

        # Print results
        print()
        print("Results: %d files imported, %d skipped, %d total." % (
            files_imported, files_skipped, files_imported + files_skipped))
        print()
Example #14
0
 def update_index(sender, instance, **kwargs):
     from search.index import Index
     idx = Index()
     idx.index_tags(instance, remove_only='created' not in kwargs)