Python GutenbergCache.get_cache 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: gutenbergpy.gutenbergcache

클래스/타입: GutenbergCache

메소드/함수: get_cache

hotexamples.com에서의 예제들: 4

Python GutenbergCache.get_cache - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 gutenbergpy.gutenbergcache.GutenbergCache.get_cache에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

create(4)

get_cache(4)

exists(2)

자주 사용되는 메소드들

create (4)

get_cache (4)

exists (2)

예제 #1

파일 보기

    def search(self):
        """ Parse a query string and do a search in the Gutenberg cache
        """

        query_string = self.titleQuery

        if query_string:
            # parse query and lookup in gutenbergcache
            cache = GutenbergCache.get_cache()
            query_results = cache.native_query(
                sql_query=
                "select * from titles where upper(name) like upper('%{query}%') limit {limit}"
                .format(query=query_string, limit=self.nbr_results))
            # get the results
            self.searchResults = list(query_results)

            # display info message
            n_results = len(self.searchResults)
            self.infoBox.setText("{n} result{s} have been found".format(
                n=n_results, s="s" if n_results > 0 else ""))

            # TODO: display results
            # Update the results list with the search results
            # in order to display them
            for idx in self.searchResults:
                result_string = str(idx[1])
                self.titleLabels.append(result_string)

                self.titleLabels = self.titleLabels
                self.clearButton.setDisabled(False)
                self.addButton.setDisabled(self.selectedTitles == list())

                self.controlArea.setDisabled(False)

        else:
            self.infoBox.setText("You didn't search anything", "warning")

예제 #2

파일 보기

    def sendData(self):
        """Compute result of widget processing and send to output"""
        # Skip if title list is empty:
        if self.myBasket == list():
            self.infoBox.setText(
                "Your corpus is empty, please add some books first", "warning")
            return

        # Clear created Inputs.
        self.clearCreatedInputs()

        self.controlArea.setDisabled(True)

        # Initialize progress bar.
        progressBar = ProgressBar(
            self,
            iterations=len(self.myBasket),
        )

        selectedTexts = list()
        text_content = list()
        annotations = list()
        # get the Gutenberg cache
        cache = GutenbergCache.get_cache()
        try:
            # TODO: Retrieve selected texts from gutenberg
            for text in self.myBasket:

                # Get the id of the text
                query_id = cache.native_query(
                    sql_query=
                    "select gutenbergbookid from books where id == {selected_id}"
                    .format(selected_id=text[2]))
                gutenberg_id = list(query_id)

                # Get the text with Gutenbergpy
                gutenberg_text = gutenbergpy.textget.strip_headers(
                    gutenbergpy.textget.get_text_by_id(gutenberg_id[0][0]))
                text_content.append(gutenberg_text)

                annotations.append(text[1])
                progressBar.advance()

        # If an error occurs (e.g. http error, or memory error)...
        except Exception:
            # Set Info box and widget to "error" state.
            self.infoBox.setText("Couldn't download data from Gutenberg",
                                 "error")
            self.controlArea.setDisabled(False)
            return

        # TODO: send gutenberg texts as output
        # Store downloaded lyrics strings in input objects...
        for text in text_content:
            newInput = Input(text, self.captionTitle)
            self.createdInputs.append(newInput)

        # If there"s only one play, the widget"s output is the created Input.
        if len(self.createdInputs) == 1:
            self.segmentation = self.createdInputs[0]

        # Otherwise the widget"s output is a concatenation...
        else:
            self.segmentation = Segmenter.concatenate(
                self.createdInputs,
                self.captionTitle,
                import_labels_as=None,
            )

        # TODO: annotate with book metadata
        # Annotate segments...
        for idx, segment in enumerate(self.segmentation):
            segment.annotations.update({"title": annotations[idx]})
            self.segmentation[idx] = segment

        # Clear progress bar.
        progressBar.finish()

        self.controlArea.setDisabled(False)

        # Set status to OK and report data size...
        message = "%i segment@p sent to output " % len(self.segmentation)
        message = pluralize(message, len(self.segmentation))
        numChars = 0
        for segment in self.segmentation:
            segmentLength = len(Segmentation.get_data(segment.str_index))
            numChars += segmentLength
        message += "(%i character@p)." % numChars
        message = pluralize(message, numChars)
        self.infoBox.setText(message)

        self.send("Gutenberg importation", self.segmentation, self)
        self.sendButton.resetSettingsChangedFlag()

예제 #3

파일 보기

import gutenbergpy.textget
from gutenbergpy.gutenbergcache import GutenbergCache

# create cache from scratchfrom scratch
GutenbergCache.create(refresh=True,
                      download=True,
                      unpack=True,
                      parse=True,
                      cache=True,
                      deleteTemp=True)
# get the default cache (SQLite)
cache = GutenbergCache.get_cache()
# For the query function you can use the following fields: languages authors types titles subjects publishers bookshelves
print(
    cache.query(downloadtype=[
        'application/plain', 'text/plain', 'text/html; charset=utf-8'
    ]))
# Print stripped text
print(
    gutenbergpy.textget.strip_headers(
        gutenbergpy.textget.get_text_by_id(1000)))

예제 #4

파일 보기

    def search(self):
        """
            Parse a query string and do a search in the Gutenberg cache
        """
        query_string = self.titleQuery
        query_author = self.authorQuery
        language = self.lang_dict[self.langQuery]

        # informs the user that he didn't change anything
        if self.langQuery == 'Any' and query_string == '' and self.authorQuery == '':
            self.infoBox.setText(
                "You can't search only by language, if it's set to Any",
                "warning")

        else:
            # Recode author to name, first_name
            if len(query_author.split()) == 2:
                if "," not in query_author:
                    query_author = "%, ".join(query_author.split()[::-1])

            # parse query and lookup in gutenbergcache
            cache = GutenbergCache.get_cache()

            # searches the database
            try:
                query_results = cache.native_query(sql_query="""
                    /* Creates a new table with one author per book
                    by selecting the greatest author id */

                    WITH unique_book_author AS
                    (SELECT * FROM book_authors  
                    WHERE authorid IN (SELECT MAX(authorid) FROM book_authors GROUP BY bookid))

                    /* Selects title, author, gutenberg id and language */

                    SELECT titles.name, authors.name, books.gutenbergbookid, languages.name
                    FROM titles

                    /* Merges every needed table into one on shared attributes */

                    INNER JOIN books ON books.id = titles.bookid
                    INNER JOIN unique_book_author ON  books.id = unique_book_author.bookid 
                    INNER JOIN authors ON authors.id = unique_book_author.authorid
                    INNER JOIN languages ON books.languageid = languages.id

                    /* Matches users query using % wildcard for more permissive query */

                    WHERE upper(titles.name) LIKE "%{title}%"
                    AND upper(authors.name) LIKE "%{author}%"
                    AND languages.name LIKE "%{lang}%"
                    LIMIT {limit}
                    """.format(title=query_string,
                               author=query_author,
                               lang=language,
                               limit=self.nbr_results))
            except Exception as exc:
                print(exc)
                self.infoBox.setText(
                    "An error occurred while interrogating the cache.",
                    "error")
                return
            # get the results
            Results = list(query_results)

            self.searchResults = list()

            # creates better results
            for result in Results:
                result = list(result)
                # replaces all newlines types
                result[0] = re.sub(r'[\n\r]+', r', ', result[0])
                # recodes athor from: name, first_name to: fisrt_name name
                result[1] = " ".join(result[1].split(", ")[::-1])
                # gets the key from the lang_dict for the coresponding language abbreviation
                result[3] = [
                    key for key, value in self.lang_dict.items()
                    if value == result[3]
                ][0]

                self.searchResults.append(result)

            # display info message
            n_results = len(self.searchResults)
            self.infoBox.setText("{n} result{s} have been found".format(
                n=n_results, s="s" if n_results > 0 else ""))

            self.clearResults()
            # Update the results list with the search results
            # in order to display them
            for idx in self.searchResults:

                result_string = "{title} — {author} — {lang}".format(
                    title=idx[0], author=idx[1], lang=idx[3])
                self.titleLabels.append(result_string)

                self.titleLabels = self.titleLabels
                self.clearButton.setDisabled(False)
                self.addButton.setDisabled(self.selectedTitles == list())

                self.controlArea.setDisabled(False)