Python DawgBuilder.DawgBuilder Examples

Programming Language: Python

Namespace/Package Name: whoosh.support.dawg

Class/Type: DawgBuilder

Method/Function: DawgBuilder

Examples at hotexamples.com: 4

Python DawgBuilder.DawgBuilder - 4 examples found. These are the top rated real world Python examples of whoosh.support.dawg.DawgBuilder.DawgBuilder extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DawgBuilder(4)

insert(3)

write(3)

close(2)

Example #1

Show file

def test_dawg():
    from whoosh.support.dawg import DawgBuilder

    with TempStorage() as st:
        df = st.create_file("test.dawg")

        dw = DawgBuilder(field_root=True)
        dw.insert(["test"] + list("special"))
        dw.insert(["test"] + list("specials"))
        dw.write(df)

        assert_equal(list(dawg.flatten(dw.root.edge("test"))),
                     ["special", "specials"])

Example #2

Show file

def add_spelling(ix, fieldnames, commit=True):
    """Adds spelling files to an existing index that was created without
    them, and modifies the schema so the given fields have the ``spelling``
    attribute. Only works on filedb indexes.
    
    >>> ix = index.open_dir("testindex")
    >>> add_spelling(ix, ["content", "tags"])
    
    :param ix: a :class:`whoosh.filedb.fileindex.FileIndex` object.
    :param fieldnames: a list of field names to create word graphs for.
    :param force: if True, overwrites existing word graph files. This is only
        useful for debugging.
    """

    from whoosh.filedb.filereading import SegmentReader

    writer = ix.writer()
    storage = writer.storage
    schema = writer.schema
    segments = writer.segments

    for segment in segments:
        filename = segment.dawg_filename
        r = SegmentReader(storage, schema, segment)
        f = storage.create_file(filename)
        dawg = DawgBuilder(field_root=True)
        for fieldname in fieldnames:
            ft = (fieldname, )
            for word in r.lexicon(fieldname):
                dawg.insert(ft + tuple(word))
        dawg.write(f)

    for fieldname in fieldnames:
        schema[fieldname].spelling = True

    if commit:
        writer.commit(merge=False)

Example #3

Show file

    def __init__(self,
                 ix,
                 poolclass=None,
                 procs=0,
                 blocklimit=128,
                 timeout=0.0,
                 delay=0.1,
                 name=None,
                 _lk=True,
                 **poolargs):

        self.writelock = None
        if _lk:
            self.writelock = ix.lock("WRITELOCK")
            if not try_for(
                    self.writelock.acquire, timeout=timeout, delay=delay):
                raise LockError

        info = ix._read_toc()
        self.schema = info.schema
        self.segments = info.segments
        self.storage = storage = ix.storage
        self.indexname = ix.indexname
        self.is_closed = False

        self.blocklimit = blocklimit
        self.segment_number = info.segment_counter + 1
        self.generation = info.generation + 1

        self._doc_offsets = []
        base = 0
        for s in self.segments:
            self._doc_offsets.append(base)
            base += s.doc_count_all()

        self.name = name or Segment.basename(self.indexname,
                                             self.segment_number)
        self.docnum = 0
        self.fieldlength_totals = defaultdict(int)
        self._added = False
        self._unique_cache = {}

        # Create a temporary segment to use its .*_filename attributes
        segment = Segment(self.name, self.generation, 0, None, None, None)

        # Spelling
        self.wordsets = {}
        self.dawg = None
        if any(field.spelling for field in self.schema):
            self.dawgfile = storage.create_file(segment.dawg_filename)
            self.dawg = DawgBuilder(field_root=True)

        # Terms index
        tf = storage.create_file(segment.termsindex_filename)
        ti = TermIndexWriter(tf)
        # Term postings file
        pf = storage.create_file(segment.termposts_filename)
        pw = FilePostingWriter(pf, blocklimit=blocklimit)
        # Terms writer
        self.termswriter = TermsWriter(self.schema, ti, pw, self.dawg)

        if self.schema.has_vectored_fields():
            # Vector index
            vf = storage.create_file(segment.vectorindex_filename)
            self.vectorindex = TermVectorWriter(vf)

            # Vector posting file
            vpf = storage.create_file(segment.vectorposts_filename)
            self.vpostwriter = FilePostingWriter(vpf, stringids=True)
        else:
            self.vectorindex = None
            self.vpostwriter = None

        # Stored fields file
        sf = storage.create_file(segment.storedfields_filename)
        self.storedfields = StoredFieldWriter(sf, self.schema.stored_names())

        # Field lengths file
        self.lengthfile = storage.create_file(segment.fieldlengths_filename)

        # Create the pool
        if poolclass is None:
            if procs > 1:
                from whoosh.filedb.multiproc import MultiPool
                poolclass = MultiPool
            else:
                poolclass = TempfilePool
        self.pool = poolclass(self.schema, procs=procs, **poolargs)

Example #4

Show file

 def _make_dawg_files(self):
     dawgfile = self.segment.create_file(self.storage, StdCodec.DAWG_EXT)
     self.dawg = DawgBuilder(dawgfile, field_root=True)