Example #1
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if merge_point_found:  # append the remaining to unchanged
            unchanged_segments.append(seg)
        else:  # look for a merge point
            segments_to_merge.append((seg, i)) # merge every segment up to the merge point
            if i > 3 and total_docs < fib(i + 5):
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        return segments
Example #2
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if merge_point_found:  # append the remaining to unchanged
            unchanged_segments.append(seg)
        else:  # look for a merge point
            segments_to_merge.append(
                (seg, i))  # merge every segment up to the merge point
            if i > 3 and total_docs < fib(i + 5):
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        return segments
Example #3
0
def MERGE_CUSTOM(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader
    from whoosh.util import fib

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    log_stats = False

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if log_stats:
            logger.debug("%s: %s/%s, fib %s", i, count, total_docs, fib(i + 5))

        if merge_point_found:
            unchanged_segments.append(seg)
        else:
            segments_to_merge.append((seg, i))
            if i > 3 and total_docs < fib(i + 5):
                logger.debug("Merge point found at %s - %s", i, total_docs)
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            logger.info("Merging segment %s having size %s", i,
                        seg.doc_count_all())
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        logger.debug("No merge point found, no merge yet")
        return segments
Example #4
0
def MERGE_CUSTOM(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader
    from whoosh.util import fib

    unchanged_segments = []
    segments_to_merge = []

    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0

    log_stats = False

    merge_point_found = False
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count

        if log_stats:
            logger.debug("%s: %s/%s, fib %s", i, count, total_docs, fib(i+5))

        if merge_point_found:
            unchanged_segments.append(seg)
        else:
            segments_to_merge.append((seg, i))
            if i > 3 and total_docs < fib(i + 5):
                logger.debug("Merge point found at %s - %s", i, total_docs)
                merge_point_found = True

    if merge_point_found and len(segments_to_merge) > 1:
        for seg, i in segments_to_merge:
            logger.info("Merging segment %s having size %s", i, seg.doc_count_all())
            reader = SegmentReader(writer.storage, writer.schema, seg)
            writer.add_reader(reader)
            reader.close()
        return unchanged_segments
    else:
        logger.debug("No merge point found, no merge yet")
        return segments
Example #5
0
def MERGE_SMALL(ix, writer, segments):
    """This policy merges small segments, where small is
    defined using a heuristic based on the fibonacci sequence.
    """

    newsegments = SegmentSet()
    sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments)
    total_docs = 0
    for i, (count, seg) in enumerate(sorted_segment_list):
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                writer.add_segment(ix, seg)
            else:
                newsegments.append(seg)
    return newsegments
Example #6
0
def MERGE_SMALL(ix, writer, segments):
    """This policy merges small segments, where small is
    defined using a heuristic based on the fibonacci sequence.
    """

    newsegments = index.SegmentSet()
    sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments)
    total_docs = 0
    for i, (count, seg) in enumerate(sorted_segment_list):
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                writer.add_segment(ix, seg)
            else:
                newsegments.append(seg)
    return newsegments
Example #7
0
def MERGE_SMALL(ix, writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.filedb.filereading import SegmentReader
    newsegments = SegmentSet()
    sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments)
    total_docs = 0
    for i, (count, seg) in enumerate(sorted_segment_list):
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                writer.add_reader(SegmentReader(ix.storage, seg, ix.schema))
            else:
                newsegments.append(seg)
    return newsegments
Example #8
0
def MERGE_SMALL(ix, writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.filedb.filereading import SegmentReader
    newsegments = SegmentSet()
    sorted_segment_list = sorted((s.doc_count_all(), s) for s in segments)
    total_docs = 0
    for i, (count, seg) in enumerate(sorted_segment_list):
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                writer.add_reader(SegmentReader(ix.storage, seg, ix.schema))
            else:
                newsegments.append(seg)
    return newsegments
Example #9
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.filedb.filereading import SegmentReader

    newsegments = []
    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                reader = SegmentReader(writer.storage, writer.schema, seg)
                writer.add_reader(reader)
                reader.close()
            else:
                newsegments.append(seg)
    return newsegments
Example #10
0
def MERGE_SMALL(writer, segments):
    """This policy merges small segments, where "small" is defined using a
    heuristic based on the fibonacci sequence.
    """

    from whoosh.reading import SegmentReader

    newsegments = []
    sorted_segment_list = sorted(segments, key=lambda s: s.doc_count_all())
    total_docs = 0
    for i, seg in enumerate(sorted_segment_list):
        count = seg.doc_count_all()
        if count > 0:
            total_docs += count
            if total_docs < fib(i + 5):
                reader = SegmentReader(writer.storage, writer.schema, seg)
                writer.add_reader(reader)
                reader.close()
            else:
                newsegments.append(seg)
    return newsegments