Python QuestionDatabase Examples

Programming Language: Python

Namespace/Package Name: qdb

Class/Type: QuestionDatabase

Examples at hotexamples.com: 3

Python QuestionDatabase - 3 examples found. These are the top rated real world Python examples of qdb.QuestionDatabase extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

QuestionDatabase(1)

questions_with_pages(1)

Example #1

Show file

File: build_whoosh.py Project: jankim/qb

def text_iterator(use_wiki, wiki_location,
                  use_qb, qb_location,
                  use_source, source_location,
                  limit=-1,
                  min_pages=0, country_list='data/country_list.txt'):
    qdb = QuestionDatabase(qb_location)
    doc_num = 0

    cw = CachedWikipedia(wiki_location, country_list)
    pages = qdb.questions_with_pages()

    errors = {}
    for pp in sorted(pages, key=lambda k: len(pages[k]),
                     reverse=True):
        # This bit of code needs to line up with the logic in qdb.py
        # to have the same logic as the page_by_count function
        if len(pages[pp]) < min_pages:
            continue

        if use_qb:
            train_questions = [x for x in pages[pp] if x.fold == "train"]
            question_text = u"\n".join(u" ".join(x.raw_words())
                                       for x in train_questions)
        else:
            question_text = u''

        if use_source:
            filename = '%s/%s' % (source_location, pp)
            if os.path.isfile(filename):
                try:
                    with gzip.open(filename, 'rb') as f:
                        source_text = f.read()
                except zlib.error:
                    print("Error reading %s" % filename)
            else:
                source_text = ''
        else:
            source_text = u''

        if use_wiki:
            wikipedia_text = cw[pp].content
        else:
            wikipedia_text = u""

        total_text = wikipedia_text
        total_text += "\n"
        total_text += question_text
        total_text += "\n"
        total_text += unidecode(source_text)

        yield pp, total_text
        doc_num += 1

        if limit > 0 and doc_num > limit:
            break

    print("ERRORS")
    print("----------------------------------------")
    for ii in errors:
        print("%s\t%s" % (ii, errors[ii]))

Example #2

Show file

File: build_whoosh.py Project: zhimingz/qb

def text_iterator(use_wiki,
                  wiki_location,
                  use_qb,
                  qb_location,
                  use_source,
                  source_location,
                  limit=-1,
                  min_pages=0,
                  country_list='data/country_list.txt'):
    qdb = QuestionDatabase(qb_location)
    doc_num = 0

    cw = CachedWikipedia(wiki_location, country_list)
    pages = qdb.questions_with_pages()

    errors = {}
    for pp in sorted(pages, key=lambda k: len(pages[k]), reverse=True):
        # This bit of code needs to line up with the logic in qdb.py
        # to have the same logic as the page_by_count function
        if len(pages[pp]) < min_pages:
            continue

        if use_qb:
            train_questions = [x for x in pages[pp] if x.fold == "train"]
            question_text = u"\n".join(u" ".join(x.raw_words())
                                       for x in train_questions)
        else:
            question_text = u''

        if use_source:
            filename = '%s/%s' % (source_location, pp)
            if os.path.isfile(filename):
                try:
                    with gzip.open(filename, 'rb') as f:
                        source_text = f.read()
                except zlib.error:
                    print("Error reading %s" % filename)
            else:
                source_text = ''
        else:
            source_text = u''

        if use_wiki:
            wikipedia_text = cw[pp].content
        else:
            wikipedia_text = u""

        total_text = wikipedia_text
        total_text += "\n"
        total_text += question_text
        total_text += "\n"
        total_text += unidecode(source_text)

        yield pp, total_text
        doc_num += 1

        if limit > 0 and doc_num > limit:
            break

    print("ERRORS")
    print("----------------------------------------")
    for ii in errors:
        print("%s\t%s" % (ii, errors[ii]))

Example #3

Show file

File: build_whoosh.py Project: EntilZha/qb

def text_iterator(use_wiki, wiki_location, use_qb, qb_location, limit,
                  min_pages=0):
    qdb = QuestionDatabase(qb_location)
    doc_num = 0

    cw = CachedWikipedia(wiki_location)
    pages = qdb.questions_with_pages()

    errors = {}
    for pp in sorted(pages, key=lambda k: len(pages[k]),
                     reverse=True):
        # This bit of code needs to line up with the logic in qdb.py
        # to have the same logic as the page_by_count function
        if len(pages[pp]) < min_pages:
            continue

        if use_qb:
            train_questions = [x for x in pages[pp] if x.fold == "train"]
            question_text = u"\n".join(u" ".join(x.raw_words())
                                       for x in train_questions)
        else:
            question_text = u''

        if use_wiki:
            try:
                wiki_links = cw[pp].links
            except:
                wiki_links = []

            try:
                wiki_categories = cw[pp].categories
            except:
                wiki_categories = []

            try:
                wikipedia_text = cw[pp].content + ' ' + \
                    ' '.join(wiki_links + wiki_categories)
            except wikipedia.exceptions.PageError:
                errors[pp] = "Not found"
                continue
            except wikipedia.exceptions.DisambiguationError:
                errors[pp] = "Disambiguation"
            except KeyError:
                errors[pp] = "KeyError"
            except ValueError:
                errors[pp] = "No JSON object could be decoded"
            except ConnectionError:
                print("Connection error ... ")
                errors[pp] = "Connection error"
                sleep(600)
                print("done waiting")
        else:
            wikipedia_text = u""

        total_text = wikipedia_text
        total_text += question_text

        yield pp, total_text
        doc_num += 1

        if limit > 0 and doc_num > limit:
            break

    print("ERRORS")
    print("----------------------------------------")
    for ii in errors:
        print("%s\t%s" % (ii, errors[ii]))